{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.59651035986914, "eval_steps": 100, "global_step": 4400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0003, "loss": 9.4785, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.00029934354485776804, "loss": 4.3277, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.00029868708971553606, "loss": 3.2452, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.00029803063457330413, "loss": 3.0373, "step": 40 }, { "epoch": 0.11, "learning_rate": 0.0002973741794310722, "loss": 2.9545, "step": 50 }, { "epoch": 0.13, "learning_rate": 0.0002967177242888403, "loss": 2.9837, "step": 60 }, { "epoch": 0.15, "learning_rate": 0.0002960612691466083, "loss": 2.9667, "step": 70 }, { "epoch": 0.17, "learning_rate": 0.00029540481400437636, "loss": 3.0758, "step": 80 }, { "epoch": 0.2, "learning_rate": 0.00029474835886214443, "loss": 2.9736, "step": 90 }, { "epoch": 0.22, "learning_rate": 0.00029409190371991245, "loss": 2.9534, "step": 100 }, { "epoch": 0.22, "eval_cer": 1.0, "eval_loss": 2.95331072807312, "eval_runtime": 78.5573, "eval_samples_per_second": 25.141, "eval_steps_per_second": 1.578, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.24, "learning_rate": 0.0002934354485776805, "loss": 2.9745, "step": 110 }, { "epoch": 0.26, "learning_rate": 0.00029277899343544854, "loss": 2.9693, "step": 120 }, { "epoch": 0.28, "learning_rate": 0.0002921225382932166, "loss": 3.0029, "step": 130 }, { "epoch": 0.31, "learning_rate": 0.0002914660831509847, "loss": 2.9767, "step": 140 }, { "epoch": 0.33, "learning_rate": 0.0002908096280087527, "loss": 2.9339, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.00029015317286652077, "loss": 2.9611, "step": 160 }, { "epoch": 0.37, "learning_rate": 0.0002894967177242888, "loss": 2.9468, "step": 170 }, { "epoch": 0.39, "learning_rate": 0.00028884026258205685, "loss": 2.9734, "step": 180 }, { "epoch": 0.41, "learning_rate": 0.0002881838074398249, "loss": 2.9493, "step": 190 }, { "epoch": 0.44, "learning_rate": 0.00028752735229759294, "loss": 2.933, "step": 200 }, { "epoch": 0.44, "eval_cer": 1.0, "eval_loss": 2.923107147216797, "eval_runtime": 76.0631, "eval_samples_per_second": 25.965, "eval_steps_per_second": 1.63, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.46, "learning_rate": 0.000286870897155361, "loss": 2.9433, "step": 210 }, { "epoch": 0.48, "learning_rate": 0.0002862144420131291, "loss": 2.9201, "step": 220 }, { "epoch": 0.5, "learning_rate": 0.0002855579868708971, "loss": 2.9529, "step": 230 }, { "epoch": 0.52, "learning_rate": 0.00028490153172866517, "loss": 2.9164, "step": 240 }, { "epoch": 0.55, "learning_rate": 0.00028424507658643324, "loss": 2.9435, "step": 250 }, { "epoch": 0.57, "learning_rate": 0.0002835886214442013, "loss": 2.9255, "step": 260 }, { "epoch": 0.59, "learning_rate": 0.00028293216630196933, "loss": 2.9487, "step": 270 }, { "epoch": 0.61, "learning_rate": 0.0002822757111597374, "loss": 2.9766, "step": 280 }, { "epoch": 0.63, "learning_rate": 0.00028161925601750547, "loss": 2.9314, "step": 290 }, { "epoch": 0.65, "learning_rate": 0.0002809628008752735, "loss": 2.904, "step": 300 }, { "epoch": 0.65, "eval_cer": 1.0, "eval_loss": 2.885084629058838, "eval_runtime": 75.9215, "eval_samples_per_second": 26.014, "eval_steps_per_second": 1.633, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.68, "learning_rate": 0.00028030634573304156, "loss": 2.9118, "step": 310 }, { "epoch": 0.7, "learning_rate": 0.00027964989059080963, "loss": 2.8872, "step": 320 }, { "epoch": 0.72, "learning_rate": 0.00027899343544857764, "loss": 2.8687, "step": 330 }, { "epoch": 0.74, "learning_rate": 0.0002783369803063457, "loss": 2.8685, "step": 340 }, { "epoch": 0.76, "learning_rate": 0.0002776805251641138, "loss": 2.8346, "step": 350 }, { "epoch": 0.79, "learning_rate": 0.0002770240700218818, "loss": 2.8082, "step": 360 }, { "epoch": 0.81, "learning_rate": 0.0002763676148796499, "loss": 2.7622, "step": 370 }, { "epoch": 0.83, "learning_rate": 0.00027571115973741794, "loss": 2.6548, "step": 380 }, { "epoch": 0.85, "learning_rate": 0.00027505470459518596, "loss": 2.525, "step": 390 }, { "epoch": 0.87, "learning_rate": 0.00027439824945295403, "loss": 2.3607, "step": 400 }, { "epoch": 0.87, "eval_cer": 0.6798586572438162, "eval_loss": 2.154635190963745, "eval_runtime": 76.1981, "eval_samples_per_second": 25.919, "eval_steps_per_second": 1.627, "eval_wer": 0.9976168184526343, "step": 400 }, { "epoch": 0.89, "learning_rate": 0.00027374179431072205, "loss": 2.1658, "step": 410 }, { "epoch": 0.92, "learning_rate": 0.0002730853391684901, "loss": 1.9075, "step": 420 }, { "epoch": 0.94, "learning_rate": 0.0002724288840262582, "loss": 1.8073, "step": 430 }, { "epoch": 0.96, "learning_rate": 0.0002717724288840262, "loss": 1.655, "step": 440 }, { "epoch": 0.98, "learning_rate": 0.0002711159737417943, "loss": 1.5691, "step": 450 }, { "epoch": 1.0, "learning_rate": 0.00027045951859956235, "loss": 1.406, "step": 460 }, { "epoch": 1.03, "learning_rate": 0.00026980306345733037, "loss": 1.3484, "step": 470 }, { "epoch": 1.05, "learning_rate": 0.00026914660831509844, "loss": 1.3013, "step": 480 }, { "epoch": 1.07, "learning_rate": 0.0002684901531728665, "loss": 1.2271, "step": 490 }, { "epoch": 1.09, "learning_rate": 0.0002678336980306345, "loss": 1.1725, "step": 500 }, { "epoch": 1.09, "eval_cer": 0.26647526501766783, "eval_loss": 0.9899328947067261, "eval_runtime": 76.6334, "eval_samples_per_second": 25.772, "eval_steps_per_second": 1.618, "eval_wer": 0.6191165205549408, "step": 500 }, { "epoch": 1.11, "learning_rate": 0.0002671772428884026, "loss": 1.1775, "step": 510 }, { "epoch": 1.13, "learning_rate": 0.00026652078774617067, "loss": 1.1966, "step": 520 }, { "epoch": 1.16, "learning_rate": 0.00026586433260393874, "loss": 1.032, "step": 530 }, { "epoch": 1.18, "learning_rate": 0.00026520787746170675, "loss": 1.059, "step": 540 }, { "epoch": 1.2, "learning_rate": 0.0002645514223194748, "loss": 1.089, "step": 550 }, { "epoch": 1.22, "learning_rate": 0.0002638949671772429, "loss": 1.0714, "step": 560 }, { "epoch": 1.24, "learning_rate": 0.0002632385120350109, "loss": 1.0671, "step": 570 }, { "epoch": 1.26, "learning_rate": 0.000262582056892779, "loss": 1.0015, "step": 580 }, { "epoch": 1.29, "learning_rate": 0.00026192560175054705, "loss": 1.0036, "step": 590 }, { "epoch": 1.31, "learning_rate": 0.00026126914660831507, "loss": 0.9865, "step": 600 }, { "epoch": 1.31, "eval_cer": 0.2125530035335689, "eval_loss": 0.806027352809906, "eval_runtime": 76.3842, "eval_samples_per_second": 25.856, "eval_steps_per_second": 1.623, "eval_wer": 0.5063835220018725, "step": 600 }, { "epoch": 1.33, "learning_rate": 0.00026061269146608314, "loss": 0.9341, "step": 610 }, { "epoch": 1.35, "learning_rate": 0.0002599562363238512, "loss": 0.9844, "step": 620 }, { "epoch": 1.37, "learning_rate": 0.00025929978118161923, "loss": 0.9056, "step": 630 }, { "epoch": 1.4, "learning_rate": 0.0002586433260393873, "loss": 0.946, "step": 640 }, { "epoch": 1.42, "learning_rate": 0.0002579868708971553, "loss": 0.9063, "step": 650 }, { "epoch": 1.44, "learning_rate": 0.0002573304157549234, "loss": 0.8988, "step": 660 }, { "epoch": 1.46, "learning_rate": 0.00025667396061269146, "loss": 0.8914, "step": 670 }, { "epoch": 1.48, "learning_rate": 0.0002560175054704595, "loss": 0.8457, "step": 680 }, { "epoch": 1.5, "learning_rate": 0.00025536105032822754, "loss": 0.9111, "step": 690 }, { "epoch": 1.53, "learning_rate": 0.00025470459518599556, "loss": 0.8959, "step": 700 }, { "epoch": 1.53, "eval_cer": 0.19803886925795053, "eval_loss": 0.713085949420929, "eval_runtime": 77.034, "eval_samples_per_second": 25.638, "eval_steps_per_second": 1.61, "eval_wer": 0.4606775044684654, "step": 700 }, { "epoch": 1.55, "learning_rate": 0.00025404814004376363, "loss": 0.8875, "step": 710 }, { "epoch": 1.57, "learning_rate": 0.0002533916849015317, "loss": 0.8102, "step": 720 }, { "epoch": 1.59, "learning_rate": 0.0002527352297592998, "loss": 0.8105, "step": 730 }, { "epoch": 1.61, "learning_rate": 0.0002520787746170678, "loss": 0.8125, "step": 740 }, { "epoch": 1.64, "learning_rate": 0.00025142231947483586, "loss": 0.7744, "step": 750 }, { "epoch": 1.66, "learning_rate": 0.00025076586433260393, "loss": 0.9137, "step": 760 }, { "epoch": 1.68, "learning_rate": 0.00025010940919037195, "loss": 0.833, "step": 770 }, { "epoch": 1.7, "learning_rate": 0.00024945295404814, "loss": 0.7844, "step": 780 }, { "epoch": 1.72, "learning_rate": 0.0002487964989059081, "loss": 0.8361, "step": 790 }, { "epoch": 1.74, "learning_rate": 0.0002481400437636761, "loss": 0.7743, "step": 800 }, { "epoch": 1.74, "eval_cer": 0.17991166077738516, "eval_loss": 0.666309654712677, "eval_runtime": 77.8448, "eval_samples_per_second": 25.371, "eval_steps_per_second": 1.593, "eval_wer": 0.4369733594348455, "step": 800 }, { "epoch": 1.77, "learning_rate": 0.0002474835886214442, "loss": 0.809, "step": 810 }, { "epoch": 1.79, "learning_rate": 0.00024682713347921225, "loss": 0.7796, "step": 820 }, { "epoch": 1.81, "learning_rate": 0.0002461706783369803, "loss": 0.7802, "step": 830 }, { "epoch": 1.83, "learning_rate": 0.00024551422319474834, "loss": 0.7669, "step": 840 }, { "epoch": 1.85, "learning_rate": 0.0002448577680525164, "loss": 0.8044, "step": 850 }, { "epoch": 1.88, "learning_rate": 0.0002442013129102845, "loss": 0.8241, "step": 860 }, { "epoch": 1.9, "learning_rate": 0.0002435448577680525, "loss": 0.7402, "step": 870 }, { "epoch": 1.92, "learning_rate": 0.00024288840262582054, "loss": 0.7299, "step": 880 }, { "epoch": 1.94, "learning_rate": 0.0002422319474835886, "loss": 0.7455, "step": 890 }, { "epoch": 1.96, "learning_rate": 0.00024157549234135668, "loss": 0.7805, "step": 900 }, { "epoch": 1.96, "eval_cer": 0.16825088339222616, "eval_loss": 0.6159283518791199, "eval_runtime": 76.6296, "eval_samples_per_second": 25.773, "eval_steps_per_second": 1.618, "eval_wer": 0.3996510341305643, "step": 900 }, { "epoch": 1.98, "learning_rate": 0.0002409190371991247, "loss": 0.8158, "step": 910 }, { "epoch": 2.01, "learning_rate": 0.00024026258205689277, "loss": 0.7092, "step": 920 }, { "epoch": 2.03, "learning_rate": 0.0002396061269146608, "loss": 0.6462, "step": 930 }, { "epoch": 2.05, "learning_rate": 0.00023894967177242885, "loss": 0.6201, "step": 940 }, { "epoch": 2.07, "learning_rate": 0.00023829321663019693, "loss": 0.6306, "step": 950 }, { "epoch": 2.09, "learning_rate": 0.00023763676148796497, "loss": 0.6428, "step": 960 }, { "epoch": 2.12, "learning_rate": 0.000236980306345733, "loss": 0.6456, "step": 970 }, { "epoch": 2.14, "learning_rate": 0.00023632385120350106, "loss": 0.6512, "step": 980 }, { "epoch": 2.16, "learning_rate": 0.00023566739606126913, "loss": 0.6547, "step": 990 }, { "epoch": 2.18, "learning_rate": 0.0002350109409190372, "loss": 0.6562, "step": 1000 }, { "epoch": 2.18, "eval_cer": 0.1536660777385159, "eval_loss": 0.618610680103302, "eval_runtime": 76.2421, "eval_samples_per_second": 25.904, "eval_steps_per_second": 1.626, "eval_wer": 0.3705421738020257, "step": 1000 }, { "epoch": 2.2, "learning_rate": 0.00023435448577680521, "loss": 0.678, "step": 1010 }, { "epoch": 2.22, "learning_rate": 0.00023369803063457329, "loss": 0.604, "step": 1020 }, { "epoch": 2.25, "learning_rate": 0.00023304157549234136, "loss": 0.6403, "step": 1030 }, { "epoch": 2.27, "learning_rate": 0.00023238512035010937, "loss": 0.5697, "step": 1040 }, { "epoch": 2.29, "learning_rate": 0.00023172866520787744, "loss": 0.6814, "step": 1050 }, { "epoch": 2.31, "learning_rate": 0.00023107221006564551, "loss": 0.6109, "step": 1060 }, { "epoch": 2.33, "learning_rate": 0.00023041575492341353, "loss": 0.6573, "step": 1070 }, { "epoch": 2.36, "learning_rate": 0.0002297592997811816, "loss": 0.682, "step": 1080 }, { "epoch": 2.38, "learning_rate": 0.00022910284463894965, "loss": 0.6031, "step": 1090 }, { "epoch": 2.4, "learning_rate": 0.00022844638949671772, "loss": 0.6223, "step": 1100 }, { "epoch": 2.4, "eval_cer": 0.14960247349823322, "eval_loss": 0.5698063969612122, "eval_runtime": 76.2271, "eval_samples_per_second": 25.909, "eval_steps_per_second": 1.627, "eval_wer": 0.3551791641841859, "step": 1100 }, { "epoch": 2.42, "learning_rate": 0.00022778993435448576, "loss": 0.5628, "step": 1110 }, { "epoch": 2.44, "learning_rate": 0.0002271334792122538, "loss": 0.6137, "step": 1120 }, { "epoch": 2.46, "learning_rate": 0.00022647702407002188, "loss": 0.601, "step": 1130 }, { "epoch": 2.49, "learning_rate": 0.0002258205689277899, "loss": 0.6086, "step": 1140 }, { "epoch": 2.51, "learning_rate": 0.00022516411378555796, "loss": 0.6377, "step": 1150 }, { "epoch": 2.53, "learning_rate": 0.00022450765864332603, "loss": 0.6022, "step": 1160 }, { "epoch": 2.55, "learning_rate": 0.00022385120350109405, "loss": 0.6024, "step": 1170 }, { "epoch": 2.57, "learning_rate": 0.00022319474835886212, "loss": 0.6133, "step": 1180 }, { "epoch": 2.6, "learning_rate": 0.0002225382932166302, "loss": 0.5927, "step": 1190 }, { "epoch": 2.62, "learning_rate": 0.00022188183807439824, "loss": 0.5627, "step": 1200 }, { "epoch": 2.62, "eval_cer": 0.14463780918727914, "eval_loss": 0.5555065870285034, "eval_runtime": 77.0134, "eval_samples_per_second": 25.645, "eval_steps_per_second": 1.61, "eval_wer": 0.33722018895225125, "step": 1200 }, { "epoch": 2.64, "learning_rate": 0.00022122538293216628, "loss": 0.6592, "step": 1210 }, { "epoch": 2.66, "learning_rate": 0.00022056892778993432, "loss": 0.6241, "step": 1220 }, { "epoch": 2.68, "learning_rate": 0.0002199124726477024, "loss": 0.6223, "step": 1230 }, { "epoch": 2.7, "learning_rate": 0.00021925601750547044, "loss": 0.6151, "step": 1240 }, { "epoch": 2.73, "learning_rate": 0.00021859956236323848, "loss": 0.6498, "step": 1250 }, { "epoch": 2.75, "learning_rate": 0.00021794310722100655, "loss": 0.6824, "step": 1260 }, { "epoch": 2.77, "learning_rate": 0.00021728665207877462, "loss": 0.6187, "step": 1270 }, { "epoch": 2.79, "learning_rate": 0.00021663019693654264, "loss": 0.6401, "step": 1280 }, { "epoch": 2.81, "learning_rate": 0.0002159737417943107, "loss": 0.5839, "step": 1290 }, { "epoch": 2.84, "learning_rate": 0.00021531728665207878, "loss": 0.5476, "step": 1300 }, { "epoch": 2.84, "eval_cer": 0.1416166077738516, "eval_loss": 0.5435045957565308, "eval_runtime": 76.2684, "eval_samples_per_second": 25.895, "eval_steps_per_second": 1.626, "eval_wer": 0.3306664396969955, "step": 1300 }, { "epoch": 2.86, "learning_rate": 0.0002146608315098468, "loss": 0.561, "step": 1310 }, { "epoch": 2.88, "learning_rate": 0.00021400437636761487, "loss": 0.6238, "step": 1320 }, { "epoch": 2.9, "learning_rate": 0.0002133479212253829, "loss": 0.6401, "step": 1330 }, { "epoch": 2.92, "learning_rate": 0.00021269146608315096, "loss": 0.5713, "step": 1340 }, { "epoch": 2.94, "learning_rate": 0.00021203501094091903, "loss": 0.6343, "step": 1350 }, { "epoch": 2.97, "learning_rate": 0.00021137855579868707, "loss": 0.5926, "step": 1360 }, { "epoch": 2.99, "learning_rate": 0.00021072210065645514, "loss": 0.6025, "step": 1370 }, { "epoch": 3.01, "learning_rate": 0.00021006564551422316, "loss": 0.5069, "step": 1380 }, { "epoch": 3.03, "learning_rate": 0.00020940919037199123, "loss": 0.4978, "step": 1390 }, { "epoch": 3.05, "learning_rate": 0.0002087527352297593, "loss": 0.5002, "step": 1400 }, { "epoch": 3.05, "eval_cer": 0.14363957597173144, "eval_loss": 0.530446469783783, "eval_runtime": 76.7632, "eval_samples_per_second": 25.728, "eval_steps_per_second": 1.615, "eval_wer": 0.3393054728061963, "step": 1400 }, { "epoch": 3.08, "learning_rate": 0.00020809628008752732, "loss": 0.4814, "step": 1410 }, { "epoch": 3.1, "learning_rate": 0.0002074398249452954, "loss": 0.482, "step": 1420 }, { "epoch": 3.12, "learning_rate": 0.00020678336980306346, "loss": 0.5199, "step": 1430 }, { "epoch": 3.14, "learning_rate": 0.00020612691466083147, "loss": 0.4718, "step": 1440 }, { "epoch": 3.16, "learning_rate": 0.00020547045951859955, "loss": 0.5226, "step": 1450 }, { "epoch": 3.18, "learning_rate": 0.0002048140043763676, "loss": 0.5668, "step": 1460 }, { "epoch": 3.21, "learning_rate": 0.00020415754923413566, "loss": 0.5, "step": 1470 }, { "epoch": 3.23, "learning_rate": 0.0002035010940919037, "loss": 0.5437, "step": 1480 }, { "epoch": 3.25, "learning_rate": 0.00020284463894967175, "loss": 0.504, "step": 1490 }, { "epoch": 3.27, "learning_rate": 0.00020218818380743982, "loss": 0.5174, "step": 1500 }, { "epoch": 3.27, "eval_cer": 0.14854240282685513, "eval_loss": 0.5377296805381775, "eval_runtime": 76.3889, "eval_samples_per_second": 25.855, "eval_steps_per_second": 1.623, "eval_wer": 0.33568814367180183, "step": 1500 }, { "epoch": 3.29, "learning_rate": 0.00020153172866520784, "loss": 0.4864, "step": 1510 }, { "epoch": 3.32, "learning_rate": 0.0002008752735229759, "loss": 0.5128, "step": 1520 }, { "epoch": 3.34, "learning_rate": 0.00020021881838074398, "loss": 0.5356, "step": 1530 }, { "epoch": 3.36, "learning_rate": 0.000199562363238512, "loss": 0.512, "step": 1540 }, { "epoch": 3.38, "learning_rate": 0.00019890590809628006, "loss": 0.4907, "step": 1550 }, { "epoch": 3.4, "learning_rate": 0.00019824945295404814, "loss": 0.4675, "step": 1560 }, { "epoch": 3.42, "learning_rate": 0.00019759299781181618, "loss": 0.5075, "step": 1570 }, { "epoch": 3.45, "learning_rate": 0.00019693654266958422, "loss": 0.4871, "step": 1580 }, { "epoch": 3.47, "learning_rate": 0.0001962800875273523, "loss": 0.5308, "step": 1590 }, { "epoch": 3.49, "learning_rate": 0.00019562363238512034, "loss": 0.4745, "step": 1600 }, { "epoch": 3.49, "eval_cer": 0.13403710247349823, "eval_loss": 0.528931736946106, "eval_runtime": 76.9163, "eval_samples_per_second": 25.677, "eval_steps_per_second": 1.612, "eval_wer": 0.31321814622521066, "step": 1600 }, { "epoch": 3.51, "learning_rate": 0.00019496717724288838, "loss": 0.5471, "step": 1610 }, { "epoch": 3.53, "learning_rate": 0.00019431072210065642, "loss": 0.4837, "step": 1620 }, { "epoch": 3.56, "learning_rate": 0.0001936542669584245, "loss": 0.5438, "step": 1630 }, { "epoch": 3.58, "learning_rate": 0.00019299781181619254, "loss": 0.4934, "step": 1640 }, { "epoch": 3.6, "learning_rate": 0.00019234135667396058, "loss": 0.5325, "step": 1650 }, { "epoch": 3.62, "learning_rate": 0.00019168490153172865, "loss": 0.4638, "step": 1660 }, { "epoch": 3.64, "learning_rate": 0.00019102844638949672, "loss": 0.5239, "step": 1670 }, { "epoch": 3.66, "learning_rate": 0.00019037199124726474, "loss": 0.5334, "step": 1680 }, { "epoch": 3.69, "learning_rate": 0.0001897155361050328, "loss": 0.5197, "step": 1690 }, { "epoch": 3.71, "learning_rate": 0.00018905908096280086, "loss": 0.5239, "step": 1700 }, { "epoch": 3.71, "eval_cer": 0.13953180212014135, "eval_loss": 0.5112127661705017, "eval_runtime": 76.4879, "eval_samples_per_second": 25.821, "eval_steps_per_second": 1.621, "eval_wer": 0.32394246318835646, "step": 1700 }, { "epoch": 3.73, "learning_rate": 0.0001884026258205689, "loss": 0.4774, "step": 1710 }, { "epoch": 3.75, "learning_rate": 0.00018774617067833697, "loss": 0.5223, "step": 1720 }, { "epoch": 3.77, "learning_rate": 0.00018708971553610501, "loss": 0.5327, "step": 1730 }, { "epoch": 3.79, "learning_rate": 0.00018643326039387309, "loss": 0.4677, "step": 1740 }, { "epoch": 3.82, "learning_rate": 0.0001857768052516411, "loss": 0.4709, "step": 1750 }, { "epoch": 3.84, "learning_rate": 0.00018512035010940917, "loss": 0.5068, "step": 1760 }, { "epoch": 3.86, "learning_rate": 0.00018446389496717724, "loss": 0.4975, "step": 1770 }, { "epoch": 3.88, "learning_rate": 0.00018380743982494526, "loss": 0.5317, "step": 1780 }, { "epoch": 3.9, "learning_rate": 0.00018315098468271333, "loss": 0.4874, "step": 1790 }, { "epoch": 3.93, "learning_rate": 0.0001824945295404814, "loss": 0.5115, "step": 1800 }, { "epoch": 3.93, "eval_cer": 0.13419611307420495, "eval_loss": 0.5079012513160706, "eval_runtime": 76.8322, "eval_samples_per_second": 25.705, "eval_steps_per_second": 1.614, "eval_wer": 0.3093880330240872, "step": 1800 }, { "epoch": 3.95, "learning_rate": 0.00018183807439824942, "loss": 0.4567, "step": 1810 }, { "epoch": 3.97, "learning_rate": 0.0001811816192560175, "loss": 0.4669, "step": 1820 }, { "epoch": 3.99, "learning_rate": 0.00018052516411378556, "loss": 0.5232, "step": 1830 }, { "epoch": 4.01, "learning_rate": 0.0001798687089715536, "loss": 0.4515, "step": 1840 }, { "epoch": 4.03, "learning_rate": 0.00017921225382932165, "loss": 0.4392, "step": 1850 }, { "epoch": 4.06, "learning_rate": 0.0001785557986870897, "loss": 0.4071, "step": 1860 }, { "epoch": 4.08, "learning_rate": 0.00017789934354485776, "loss": 0.4555, "step": 1870 }, { "epoch": 4.1, "learning_rate": 0.0001772428884026258, "loss": 0.39, "step": 1880 }, { "epoch": 4.12, "learning_rate": 0.00017658643326039385, "loss": 0.4553, "step": 1890 }, { "epoch": 4.14, "learning_rate": 0.00017592997811816192, "loss": 0.4471, "step": 1900 }, { "epoch": 4.14, "eval_cer": 0.13007950530035337, "eval_loss": 0.5131427645683289, "eval_runtime": 76.8195, "eval_samples_per_second": 25.71, "eval_steps_per_second": 1.614, "eval_wer": 0.2964933185803047, "step": 1900 }, { "epoch": 4.17, "learning_rate": 0.00017527352297592994, "loss": 0.4744, "step": 1910 }, { "epoch": 4.19, "learning_rate": 0.000174617067833698, "loss": 0.4303, "step": 1920 }, { "epoch": 4.21, "learning_rate": 0.00017396061269146608, "loss": 0.4447, "step": 1930 }, { "epoch": 4.23, "learning_rate": 0.00017330415754923412, "loss": 0.4543, "step": 1940 }, { "epoch": 4.25, "learning_rate": 0.00017264770240700217, "loss": 0.4768, "step": 1950 }, { "epoch": 4.27, "learning_rate": 0.00017199124726477024, "loss": 0.475, "step": 1960 }, { "epoch": 4.3, "learning_rate": 0.00017133479212253828, "loss": 0.4539, "step": 1970 }, { "epoch": 4.32, "learning_rate": 0.00017067833698030632, "loss": 0.42, "step": 1980 }, { "epoch": 4.34, "learning_rate": 0.00017002188183807437, "loss": 0.4361, "step": 1990 }, { "epoch": 4.36, "learning_rate": 0.00016936542669584244, "loss": 0.4455, "step": 2000 }, { "epoch": 4.36, "eval_cer": 0.12781802120141342, "eval_loss": 0.5015448331832886, "eval_runtime": 76.7477, "eval_samples_per_second": 25.734, "eval_steps_per_second": 1.616, "eval_wer": 0.2930887735126394, "step": 2000 }, { "epoch": 4.38, "learning_rate": 0.00016870897155361048, "loss": 0.4328, "step": 2010 }, { "epoch": 4.41, "learning_rate": 0.00016805251641137853, "loss": 0.4189, "step": 2020 }, { "epoch": 4.43, "learning_rate": 0.0001673960612691466, "loss": 0.4207, "step": 2030 }, { "epoch": 4.45, "learning_rate": 0.00016673960612691467, "loss": 0.4453, "step": 2040 }, { "epoch": 4.47, "learning_rate": 0.00016608315098468268, "loss": 0.4352, "step": 2050 }, { "epoch": 4.49, "learning_rate": 0.00016542669584245076, "loss": 0.4666, "step": 2060 }, { "epoch": 4.51, "learning_rate": 0.00016477024070021883, "loss": 0.4031, "step": 2070 }, { "epoch": 4.54, "learning_rate": 0.00016411378555798684, "loss": 0.3793, "step": 2080 }, { "epoch": 4.56, "learning_rate": 0.00016345733041575491, "loss": 0.4319, "step": 2090 }, { "epoch": 4.58, "learning_rate": 0.00016280087527352296, "loss": 0.4199, "step": 2100 }, { "epoch": 4.58, "eval_cer": 0.12986749116607774, "eval_loss": 0.49543923139572144, "eval_runtime": 76.5642, "eval_samples_per_second": 25.795, "eval_steps_per_second": 1.62, "eval_wer": 0.2962379777002298, "step": 2100 }, { "epoch": 4.6, "learning_rate": 0.00016214442013129103, "loss": 0.4658, "step": 2110 }, { "epoch": 4.62, "learning_rate": 0.00016148796498905907, "loss": 0.446, "step": 2120 }, { "epoch": 4.65, "learning_rate": 0.00016083150984682712, "loss": 0.3957, "step": 2130 }, { "epoch": 4.67, "learning_rate": 0.0001601750547045952, "loss": 0.4544, "step": 2140 }, { "epoch": 4.69, "learning_rate": 0.0001595185995623632, "loss": 0.4609, "step": 2150 }, { "epoch": 4.71, "learning_rate": 0.00015886214442013127, "loss": 0.4175, "step": 2160 }, { "epoch": 4.73, "learning_rate": 0.00015820568927789935, "loss": 0.4258, "step": 2170 }, { "epoch": 4.75, "learning_rate": 0.00015754923413566736, "loss": 0.4329, "step": 2180 }, { "epoch": 4.78, "learning_rate": 0.00015689277899343543, "loss": 0.4624, "step": 2190 }, { "epoch": 4.8, "learning_rate": 0.0001562363238512035, "loss": 0.4699, "step": 2200 }, { "epoch": 4.8, "eval_cer": 0.12684628975265017, "eval_loss": 0.48266056180000305, "eval_runtime": 76.2619, "eval_samples_per_second": 25.898, "eval_steps_per_second": 1.626, "eval_wer": 0.2890458762447868, "step": 2200 }, { "epoch": 4.82, "learning_rate": 0.00015557986870897155, "loss": 0.454, "step": 2210 }, { "epoch": 4.84, "learning_rate": 0.0001549234135667396, "loss": 0.433, "step": 2220 }, { "epoch": 4.86, "learning_rate": 0.00015426695842450763, "loss": 0.4222, "step": 2230 }, { "epoch": 4.89, "learning_rate": 0.0001536105032822757, "loss": 0.3791, "step": 2240 }, { "epoch": 4.91, "learning_rate": 0.00015295404814004375, "loss": 0.4325, "step": 2250 }, { "epoch": 4.93, "learning_rate": 0.0001522975929978118, "loss": 0.4396, "step": 2260 }, { "epoch": 4.95, "learning_rate": 0.00015164113785557986, "loss": 0.3989, "step": 2270 }, { "epoch": 4.97, "learning_rate": 0.00015098468271334788, "loss": 0.4068, "step": 2280 }, { "epoch": 4.99, "learning_rate": 0.00015032822757111595, "loss": 0.4422, "step": 2290 }, { "epoch": 5.02, "learning_rate": 0.00014967177242888402, "loss": 0.3521, "step": 2300 }, { "epoch": 5.02, "eval_cer": 0.12168727915194347, "eval_loss": 0.48572927713394165, "eval_runtime": 76.3316, "eval_samples_per_second": 25.874, "eval_steps_per_second": 1.624, "eval_wer": 0.27823644565494937, "step": 2300 }, { "epoch": 5.04, "learning_rate": 0.00014901531728665207, "loss": 0.4338, "step": 2310 }, { "epoch": 5.06, "learning_rate": 0.00014835886214442014, "loss": 0.3637, "step": 2320 }, { "epoch": 5.08, "learning_rate": 0.00014770240700218818, "loss": 0.4171, "step": 2330 }, { "epoch": 5.1, "learning_rate": 0.00014704595185995622, "loss": 0.3593, "step": 2340 }, { "epoch": 5.13, "learning_rate": 0.00014638949671772427, "loss": 0.4326, "step": 2350 }, { "epoch": 5.15, "learning_rate": 0.00014573304157549234, "loss": 0.3774, "step": 2360 }, { "epoch": 5.17, "learning_rate": 0.00014507658643326038, "loss": 0.3695, "step": 2370 }, { "epoch": 5.19, "learning_rate": 0.00014442013129102843, "loss": 0.3842, "step": 2380 }, { "epoch": 5.21, "learning_rate": 0.00014376367614879647, "loss": 0.3443, "step": 2390 }, { "epoch": 5.23, "learning_rate": 0.00014310722100656454, "loss": 0.3976, "step": 2400 }, { "epoch": 5.23, "eval_cer": 0.12307420494699646, "eval_loss": 0.493564635515213, "eval_runtime": 76.5075, "eval_samples_per_second": 25.814, "eval_steps_per_second": 1.621, "eval_wer": 0.2801940590688569, "step": 2400 }, { "epoch": 5.26, "learning_rate": 0.00014245076586433258, "loss": 0.3759, "step": 2410 }, { "epoch": 5.28, "learning_rate": 0.00014179431072210066, "loss": 0.3789, "step": 2420 }, { "epoch": 5.3, "learning_rate": 0.0001411378555798687, "loss": 0.366, "step": 2430 }, { "epoch": 5.32, "learning_rate": 0.00014048140043763674, "loss": 0.3961, "step": 2440 }, { "epoch": 5.34, "learning_rate": 0.00013982494529540481, "loss": 0.3863, "step": 2450 }, { "epoch": 5.37, "learning_rate": 0.00013916849015317286, "loss": 0.3987, "step": 2460 }, { "epoch": 5.39, "learning_rate": 0.0001385120350109409, "loss": 0.3788, "step": 2470 }, { "epoch": 5.41, "learning_rate": 0.00013785557986870897, "loss": 0.3735, "step": 2480 }, { "epoch": 5.43, "learning_rate": 0.00013719912472647702, "loss": 0.377, "step": 2490 }, { "epoch": 5.45, "learning_rate": 0.00013654266958424506, "loss": 0.365, "step": 2500 }, { "epoch": 5.45, "eval_cer": 0.12206713780918728, "eval_loss": 0.49057817459106445, "eval_runtime": 76.5154, "eval_samples_per_second": 25.812, "eval_steps_per_second": 1.621, "eval_wer": 0.2774278662013788, "step": 2500 }, { "epoch": 5.47, "learning_rate": 0.0001358862144420131, "loss": 0.4061, "step": 2510 }, { "epoch": 5.5, "learning_rate": 0.00013522975929978117, "loss": 0.3679, "step": 2520 }, { "epoch": 5.52, "learning_rate": 0.00013457330415754922, "loss": 0.399, "step": 2530 }, { "epoch": 5.54, "learning_rate": 0.00013391684901531726, "loss": 0.3358, "step": 2540 }, { "epoch": 5.56, "learning_rate": 0.00013326039387308533, "loss": 0.3544, "step": 2550 }, { "epoch": 5.58, "learning_rate": 0.00013260393873085338, "loss": 0.3766, "step": 2560 }, { "epoch": 5.61, "learning_rate": 0.00013194748358862145, "loss": 0.3696, "step": 2570 }, { "epoch": 5.63, "learning_rate": 0.0001312910284463895, "loss": 0.3834, "step": 2580 }, { "epoch": 5.65, "learning_rate": 0.00013063457330415753, "loss": 0.3654, "step": 2590 }, { "epoch": 5.67, "learning_rate": 0.0001299781181619256, "loss": 0.3857, "step": 2600 }, { "epoch": 5.67, "eval_cer": 0.12015017667844523, "eval_loss": 0.48429372906684875, "eval_runtime": 76.9088, "eval_samples_per_second": 25.68, "eval_steps_per_second": 1.612, "eval_wer": 0.2757255936675462, "step": 2600 }, { "epoch": 5.69, "learning_rate": 0.00012932166301969365, "loss": 0.4039, "step": 2610 }, { "epoch": 5.71, "learning_rate": 0.0001286652078774617, "loss": 0.3752, "step": 2620 }, { "epoch": 5.74, "learning_rate": 0.00012800875273522974, "loss": 0.4239, "step": 2630 }, { "epoch": 5.76, "learning_rate": 0.00012735229759299778, "loss": 0.3803, "step": 2640 }, { "epoch": 5.78, "learning_rate": 0.00012669584245076585, "loss": 0.3371, "step": 2650 }, { "epoch": 5.8, "learning_rate": 0.0001260393873085339, "loss": 0.3713, "step": 2660 }, { "epoch": 5.82, "learning_rate": 0.00012538293216630197, "loss": 0.4023, "step": 2670 }, { "epoch": 5.85, "learning_rate": 0.00012472647702407, "loss": 0.3747, "step": 2680 }, { "epoch": 5.87, "learning_rate": 0.00012407002188183805, "loss": 0.3474, "step": 2690 }, { "epoch": 5.89, "learning_rate": 0.00012341356673960612, "loss": 0.3578, "step": 2700 }, { "epoch": 5.89, "eval_cer": 0.11956713780918728, "eval_loss": 0.4857370853424072, "eval_runtime": 77.8242, "eval_samples_per_second": 25.378, "eval_steps_per_second": 1.593, "eval_wer": 0.27083156013277726, "step": 2700 }, { "epoch": 5.91, "learning_rate": 0.00012275711159737417, "loss": 0.3594, "step": 2710 }, { "epoch": 5.93, "learning_rate": 0.00012210065645514224, "loss": 0.4503, "step": 2720 }, { "epoch": 5.95, "learning_rate": 0.00012144420131291027, "loss": 0.3817, "step": 2730 }, { "epoch": 5.98, "learning_rate": 0.00012078774617067834, "loss": 0.3685, "step": 2740 }, { "epoch": 6.0, "learning_rate": 0.00012013129102844638, "loss": 0.3637, "step": 2750 }, { "epoch": 6.02, "learning_rate": 0.00011947483588621443, "loss": 0.3677, "step": 2760 }, { "epoch": 6.04, "learning_rate": 0.00011881838074398248, "loss": 0.3523, "step": 2770 }, { "epoch": 6.06, "learning_rate": 0.00011816192560175053, "loss": 0.3554, "step": 2780 }, { "epoch": 6.09, "learning_rate": 0.0001175054704595186, "loss": 0.3441, "step": 2790 }, { "epoch": 6.11, "learning_rate": 0.00011684901531728664, "loss": 0.3298, "step": 2800 }, { "epoch": 6.11, "eval_cer": 0.11965547703180213, "eval_loss": 0.4866645634174347, "eval_runtime": 76.5936, "eval_samples_per_second": 25.785, "eval_steps_per_second": 1.619, "eval_wer": 0.2688739467188697, "step": 2800 }, { "epoch": 6.13, "learning_rate": 0.00011619256017505469, "loss": 0.3301, "step": 2810 }, { "epoch": 6.15, "learning_rate": 0.00011553610503282276, "loss": 0.3277, "step": 2820 }, { "epoch": 6.17, "learning_rate": 0.0001148796498905908, "loss": 0.3392, "step": 2830 }, { "epoch": 6.19, "learning_rate": 0.00011422319474835886, "loss": 0.342, "step": 2840 }, { "epoch": 6.22, "learning_rate": 0.0001135667396061269, "loss": 0.3534, "step": 2850 }, { "epoch": 6.24, "learning_rate": 0.00011291028446389495, "loss": 0.3009, "step": 2860 }, { "epoch": 6.26, "learning_rate": 0.00011225382932166302, "loss": 0.3287, "step": 2870 }, { "epoch": 6.28, "learning_rate": 0.00011159737417943106, "loss": 0.3427, "step": 2880 }, { "epoch": 6.3, "learning_rate": 0.00011094091903719912, "loss": 0.3959, "step": 2890 }, { "epoch": 6.32, "learning_rate": 0.00011028446389496716, "loss": 0.3099, "step": 2900 }, { "epoch": 6.32, "eval_cer": 0.12365724381625441, "eval_loss": 0.49239638447761536, "eval_runtime": 77.6773, "eval_samples_per_second": 25.426, "eval_steps_per_second": 1.596, "eval_wer": 0.27695974125457484, "step": 2900 }, { "epoch": 6.35, "learning_rate": 0.00010962800875273522, "loss": 0.364, "step": 2910 }, { "epoch": 6.37, "learning_rate": 0.00010897155361050328, "loss": 0.3946, "step": 2920 }, { "epoch": 6.39, "learning_rate": 0.00010831509846827132, "loss": 0.3134, "step": 2930 }, { "epoch": 6.41, "learning_rate": 0.00010765864332603939, "loss": 0.3433, "step": 2940 }, { "epoch": 6.43, "learning_rate": 0.00010700218818380743, "loss": 0.3756, "step": 2950 }, { "epoch": 6.46, "learning_rate": 0.00010634573304157548, "loss": 0.3307, "step": 2960 }, { "epoch": 6.48, "learning_rate": 0.00010568927789934354, "loss": 0.3647, "step": 2970 }, { "epoch": 6.5, "learning_rate": 0.00010503282275711158, "loss": 0.3617, "step": 2980 }, { "epoch": 6.52, "learning_rate": 0.00010437636761487965, "loss": 0.3632, "step": 2990 }, { "epoch": 6.54, "learning_rate": 0.0001037199124726477, "loss": 0.3606, "step": 3000 }, { "epoch": 6.54, "eval_cer": 0.11892226148409894, "eval_loss": 0.48514822125434875, "eval_runtime": 76.2961, "eval_samples_per_second": 25.886, "eval_steps_per_second": 1.625, "eval_wer": 0.26840582177206573, "step": 3000 }, { "epoch": 6.56, "learning_rate": 0.00010306345733041574, "loss": 0.326, "step": 3010 }, { "epoch": 6.59, "learning_rate": 0.0001024070021881838, "loss": 0.3282, "step": 3020 }, { "epoch": 6.61, "learning_rate": 0.00010175054704595185, "loss": 0.3225, "step": 3030 }, { "epoch": 6.63, "learning_rate": 0.00010109409190371991, "loss": 0.3726, "step": 3040 }, { "epoch": 6.65, "learning_rate": 0.00010043763676148795, "loss": 0.3231, "step": 3050 }, { "epoch": 6.67, "learning_rate": 9.9781181619256e-05, "loss": 0.3161, "step": 3060 }, { "epoch": 6.7, "learning_rate": 9.912472647702407e-05, "loss": 0.3354, "step": 3070 }, { "epoch": 6.72, "learning_rate": 9.846827133479211e-05, "loss": 0.3628, "step": 3080 }, { "epoch": 6.74, "learning_rate": 9.781181619256017e-05, "loss": 0.372, "step": 3090 }, { "epoch": 6.76, "learning_rate": 9.715536105032821e-05, "loss": 0.3807, "step": 3100 }, { "epoch": 6.76, "eval_cer": 0.11962897526501767, "eval_loss": 0.47002533078193665, "eval_runtime": 76.3383, "eval_samples_per_second": 25.872, "eval_steps_per_second": 1.624, "eval_wer": 0.2655970720912418, "step": 3100 }, { "epoch": 6.78, "learning_rate": 9.649890590809627e-05, "loss": 0.3163, "step": 3110 }, { "epoch": 6.8, "learning_rate": 9.584245076586433e-05, "loss": 0.3699, "step": 3120 }, { "epoch": 6.83, "learning_rate": 9.518599562363237e-05, "loss": 0.3421, "step": 3130 }, { "epoch": 6.85, "learning_rate": 9.452954048140043e-05, "loss": 0.2966, "step": 3140 }, { "epoch": 6.87, "learning_rate": 9.387308533916849e-05, "loss": 0.3197, "step": 3150 }, { "epoch": 6.89, "learning_rate": 9.321663019693654e-05, "loss": 0.325, "step": 3160 }, { "epoch": 6.91, "learning_rate": 9.256017505470459e-05, "loss": 0.3375, "step": 3170 }, { "epoch": 6.94, "learning_rate": 9.190371991247263e-05, "loss": 0.3241, "step": 3180 }, { "epoch": 6.96, "learning_rate": 9.12472647702407e-05, "loss": 0.2941, "step": 3190 }, { "epoch": 6.98, "learning_rate": 9.059080962800874e-05, "loss": 0.3286, "step": 3200 }, { "epoch": 6.98, "eval_cer": 0.12047703180212015, "eval_loss": 0.47703927755355835, "eval_runtime": 76.5362, "eval_samples_per_second": 25.805, "eval_steps_per_second": 1.62, "eval_wer": 0.2730445144267597, "step": 3200 }, { "epoch": 7.0, "learning_rate": 8.99343544857768e-05, "loss": 0.3453, "step": 3210 }, { "epoch": 7.02, "learning_rate": 8.927789934354485e-05, "loss": 0.3261, "step": 3220 }, { "epoch": 7.04, "learning_rate": 8.86214442013129e-05, "loss": 0.294, "step": 3230 }, { "epoch": 7.07, "learning_rate": 8.796498905908096e-05, "loss": 0.2559, "step": 3240 }, { "epoch": 7.09, "learning_rate": 8.7308533916849e-05, "loss": 0.3186, "step": 3250 }, { "epoch": 7.11, "learning_rate": 8.665207877461706e-05, "loss": 0.3612, "step": 3260 }, { "epoch": 7.13, "learning_rate": 8.599562363238512e-05, "loss": 0.3232, "step": 3270 }, { "epoch": 7.15, "learning_rate": 8.533916849015316e-05, "loss": 0.2976, "step": 3280 }, { "epoch": 7.18, "learning_rate": 8.468271334792122e-05, "loss": 0.3237, "step": 3290 }, { "epoch": 7.2, "learning_rate": 8.402625820568926e-05, "loss": 0.3318, "step": 3300 }, { "epoch": 7.2, "eval_cer": 0.11656360424028268, "eval_loss": 0.4844971299171448, "eval_runtime": 76.5793, "eval_samples_per_second": 25.79, "eval_steps_per_second": 1.619, "eval_wer": 0.25785173206230316, "step": 3300 }, { "epoch": 7.22, "learning_rate": 8.336980306345733e-05, "loss": 0.3216, "step": 3310 }, { "epoch": 7.24, "learning_rate": 8.271334792122538e-05, "loss": 0.2986, "step": 3320 }, { "epoch": 7.26, "learning_rate": 8.205689277899342e-05, "loss": 0.3111, "step": 3330 }, { "epoch": 7.28, "learning_rate": 8.140043763676148e-05, "loss": 0.3067, "step": 3340 }, { "epoch": 7.31, "learning_rate": 8.074398249452954e-05, "loss": 0.2697, "step": 3350 }, { "epoch": 7.33, "learning_rate": 8.00875273522976e-05, "loss": 0.2998, "step": 3360 }, { "epoch": 7.35, "learning_rate": 7.943107221006564e-05, "loss": 0.3187, "step": 3370 }, { "epoch": 7.37, "learning_rate": 7.877461706783368e-05, "loss": 0.3361, "step": 3380 }, { "epoch": 7.39, "learning_rate": 7.811816192560175e-05, "loss": 0.2848, "step": 3390 }, { "epoch": 7.42, "learning_rate": 7.74617067833698e-05, "loss": 0.2936, "step": 3400 }, { "epoch": 7.42, "eval_cer": 0.11591872791519435, "eval_loss": 0.49088254570961, "eval_runtime": 76.7751, "eval_samples_per_second": 25.724, "eval_steps_per_second": 1.615, "eval_wer": 0.25700059579538687, "step": 3400 }, { "epoch": 7.44, "learning_rate": 7.680525164113785e-05, "loss": 0.2901, "step": 3410 }, { "epoch": 7.46, "learning_rate": 7.61487964989059e-05, "loss": 0.2998, "step": 3420 }, { "epoch": 7.48, "learning_rate": 7.549234135667394e-05, "loss": 0.2743, "step": 3430 }, { "epoch": 7.5, "learning_rate": 7.483588621444201e-05, "loss": 0.3297, "step": 3440 }, { "epoch": 7.52, "learning_rate": 7.417943107221007e-05, "loss": 0.2912, "step": 3450 }, { "epoch": 7.55, "learning_rate": 7.352297592997811e-05, "loss": 0.2884, "step": 3460 }, { "epoch": 7.57, "learning_rate": 7.286652078774617e-05, "loss": 0.2948, "step": 3470 }, { "epoch": 7.59, "learning_rate": 7.221006564551421e-05, "loss": 0.2908, "step": 3480 }, { "epoch": 7.61, "learning_rate": 7.155361050328227e-05, "loss": 0.3031, "step": 3490 }, { "epoch": 7.63, "learning_rate": 7.089715536105033e-05, "loss": 0.3119, "step": 3500 }, { "epoch": 7.63, "eval_cer": 0.11498233215547703, "eval_loss": 0.4898751676082611, "eval_runtime": 77.9528, "eval_samples_per_second": 25.336, "eval_steps_per_second": 1.591, "eval_wer": 0.253936505234488, "step": 3500 }, { "epoch": 7.66, "learning_rate": 7.024070021881837e-05, "loss": 0.2985, "step": 3510 }, { "epoch": 7.68, "learning_rate": 6.958424507658643e-05, "loss": 0.2669, "step": 3520 }, { "epoch": 7.7, "learning_rate": 6.892778993435449e-05, "loss": 0.3468, "step": 3530 }, { "epoch": 7.72, "learning_rate": 6.827133479212253e-05, "loss": 0.2897, "step": 3540 }, { "epoch": 7.74, "learning_rate": 6.761487964989059e-05, "loss": 0.301, "step": 3550 }, { "epoch": 7.76, "learning_rate": 6.695842450765863e-05, "loss": 0.2838, "step": 3560 }, { "epoch": 7.79, "learning_rate": 6.630196936542669e-05, "loss": 0.332, "step": 3570 }, { "epoch": 7.81, "learning_rate": 6.564551422319475e-05, "loss": 0.3232, "step": 3580 }, { "epoch": 7.83, "learning_rate": 6.49890590809628e-05, "loss": 0.3118, "step": 3590 }, { "epoch": 7.85, "learning_rate": 6.433260393873085e-05, "loss": 0.3142, "step": 3600 }, { "epoch": 7.85, "eval_cer": 0.11430212014134275, "eval_loss": 0.4782133996486664, "eval_runtime": 76.4776, "eval_samples_per_second": 25.825, "eval_steps_per_second": 1.621, "eval_wer": 0.25495786875478765, "step": 3600 }, { "epoch": 7.87, "learning_rate": 6.367614879649889e-05, "loss": 0.3406, "step": 3610 }, { "epoch": 7.9, "learning_rate": 6.301969365426695e-05, "loss": 0.3147, "step": 3620 }, { "epoch": 7.92, "learning_rate": 6.2363238512035e-05, "loss": 0.3376, "step": 3630 }, { "epoch": 7.94, "learning_rate": 6.170678336980306e-05, "loss": 0.3083, "step": 3640 }, { "epoch": 7.96, "learning_rate": 6.105032822757112e-05, "loss": 0.2793, "step": 3650 }, { "epoch": 7.98, "learning_rate": 6.039387308533917e-05, "loss": 0.3022, "step": 3660 }, { "epoch": 8.0, "learning_rate": 5.9737417943107214e-05, "loss": 0.2657, "step": 3670 }, { "epoch": 8.03, "learning_rate": 5.9080962800875264e-05, "loss": 0.2794, "step": 3680 }, { "epoch": 8.05, "learning_rate": 5.842450765864332e-05, "loss": 0.2583, "step": 3690 }, { "epoch": 8.07, "learning_rate": 5.776805251641138e-05, "loss": 0.2935, "step": 3700 }, { "epoch": 8.07, "eval_cer": 0.11532685512367491, "eval_loss": 0.4885237514972687, "eval_runtime": 77.8857, "eval_samples_per_second": 25.358, "eval_steps_per_second": 1.592, "eval_wer": 0.25270235764745935, "step": 3700 }, { "epoch": 8.09, "learning_rate": 5.711159737417943e-05, "loss": 0.2726, "step": 3710 }, { "epoch": 8.11, "learning_rate": 5.645514223194747e-05, "loss": 0.2831, "step": 3720 }, { "epoch": 8.14, "learning_rate": 5.579868708971553e-05, "loss": 0.2656, "step": 3730 }, { "epoch": 8.16, "learning_rate": 5.514223194748358e-05, "loss": 0.2792, "step": 3740 }, { "epoch": 8.18, "learning_rate": 5.448577680525164e-05, "loss": 0.268, "step": 3750 }, { "epoch": 8.2, "learning_rate": 5.3829321663019695e-05, "loss": 0.2779, "step": 3760 }, { "epoch": 8.22, "learning_rate": 5.317286652078774e-05, "loss": 0.2782, "step": 3770 }, { "epoch": 8.24, "learning_rate": 5.251641137855579e-05, "loss": 0.3074, "step": 3780 }, { "epoch": 8.27, "learning_rate": 5.185995623632385e-05, "loss": 0.262, "step": 3790 }, { "epoch": 8.29, "learning_rate": 5.12035010940919e-05, "loss": 0.2805, "step": 3800 }, { "epoch": 8.29, "eval_cer": 0.11428445229681979, "eval_loss": 0.49059855937957764, "eval_runtime": 76.8475, "eval_samples_per_second": 25.7, "eval_steps_per_second": 1.614, "eval_wer": 0.25287258490084263, "step": 3800 }, { "epoch": 8.31, "learning_rate": 5.0547045951859955e-05, "loss": 0.2733, "step": 3810 }, { "epoch": 8.33, "learning_rate": 4.9890590809628e-05, "loss": 0.2947, "step": 3820 }, { "epoch": 8.35, "learning_rate": 4.9234135667396056e-05, "loss": 0.2963, "step": 3830 }, { "epoch": 8.38, "learning_rate": 4.8577680525164106e-05, "loss": 0.286, "step": 3840 }, { "epoch": 8.4, "learning_rate": 4.7921225382932163e-05, "loss": 0.2971, "step": 3850 }, { "epoch": 8.42, "learning_rate": 4.7264770240700214e-05, "loss": 0.2492, "step": 3860 }, { "epoch": 8.44, "learning_rate": 4.660831509846827e-05, "loss": 0.2564, "step": 3870 }, { "epoch": 8.46, "learning_rate": 4.5951859956236315e-05, "loss": 0.2787, "step": 3880 }, { "epoch": 8.48, "learning_rate": 4.529540481400437e-05, "loss": 0.2799, "step": 3890 }, { "epoch": 8.51, "learning_rate": 4.463894967177242e-05, "loss": 0.254, "step": 3900 }, { "epoch": 8.51, "eval_cer": 0.1143904593639576, "eval_loss": 0.48224031925201416, "eval_runtime": 76.6309, "eval_samples_per_second": 25.773, "eval_steps_per_second": 1.618, "eval_wer": 0.2537662779811048, "step": 3900 }, { "epoch": 8.53, "learning_rate": 4.398249452954048e-05, "loss": 0.2835, "step": 3910 }, { "epoch": 8.55, "learning_rate": 4.332603938730853e-05, "loss": 0.3104, "step": 3920 }, { "epoch": 8.57, "learning_rate": 4.266958424507658e-05, "loss": 0.2795, "step": 3930 }, { "epoch": 8.59, "learning_rate": 4.201312910284463e-05, "loss": 0.2655, "step": 3940 }, { "epoch": 8.62, "learning_rate": 4.135667396061269e-05, "loss": 0.2652, "step": 3950 }, { "epoch": 8.64, "learning_rate": 4.070021881838074e-05, "loss": 0.2679, "step": 3960 }, { "epoch": 8.66, "learning_rate": 4.00437636761488e-05, "loss": 0.2776, "step": 3970 }, { "epoch": 8.68, "learning_rate": 3.938730853391684e-05, "loss": 0.3042, "step": 3980 }, { "epoch": 8.7, "learning_rate": 3.87308533916849e-05, "loss": 0.2951, "step": 3990 }, { "epoch": 8.72, "learning_rate": 3.807439824945295e-05, "loss": 0.2855, "step": 4000 }, { "epoch": 8.72, "eval_cer": 0.11234098939929328, "eval_loss": 0.4852147400379181, "eval_runtime": 77.8636, "eval_samples_per_second": 25.365, "eval_steps_per_second": 1.593, "eval_wer": 0.24755298323261554, "step": 4000 }, { "epoch": 8.75, "learning_rate": 3.7417943107221006e-05, "loss": 0.279, "step": 4010 }, { "epoch": 8.77, "learning_rate": 3.6761487964989056e-05, "loss": 0.2916, "step": 4020 }, { "epoch": 8.79, "learning_rate": 3.6105032822757107e-05, "loss": 0.3098, "step": 4030 }, { "epoch": 8.81, "learning_rate": 3.5448577680525164e-05, "loss": 0.2907, "step": 4040 }, { "epoch": 8.83, "learning_rate": 3.4792122538293214e-05, "loss": 0.2496, "step": 4050 }, { "epoch": 8.85, "learning_rate": 3.4135667396061265e-05, "loss": 0.2618, "step": 4060 }, { "epoch": 8.88, "learning_rate": 3.3479212253829315e-05, "loss": 0.2516, "step": 4070 }, { "epoch": 8.9, "learning_rate": 3.282275711159737e-05, "loss": 0.2721, "step": 4080 }, { "epoch": 8.92, "learning_rate": 3.216630196936542e-05, "loss": 0.3041, "step": 4090 }, { "epoch": 8.94, "learning_rate": 3.1509846827133474e-05, "loss": 0.2661, "step": 4100 }, { "epoch": 8.94, "eval_cer": 0.11321554770318021, "eval_loss": 0.4847288429737091, "eval_runtime": 76.8119, "eval_samples_per_second": 25.712, "eval_steps_per_second": 1.614, "eval_wer": 0.24963826708656056, "step": 4100 }, { "epoch": 8.96, "learning_rate": 3.085339168490153e-05, "loss": 0.2588, "step": 4110 }, { "epoch": 8.99, "learning_rate": 3.0196936542669585e-05, "loss": 0.2807, "step": 4120 }, { "epoch": 9.01, "learning_rate": 2.9540481400437632e-05, "loss": 0.2572, "step": 4130 }, { "epoch": 9.03, "learning_rate": 2.888402625820569e-05, "loss": 0.2789, "step": 4140 }, { "epoch": 9.05, "learning_rate": 2.8227571115973736e-05, "loss": 0.234, "step": 4150 }, { "epoch": 9.07, "learning_rate": 2.757111597374179e-05, "loss": 0.2443, "step": 4160 }, { "epoch": 9.09, "learning_rate": 2.6914660831509848e-05, "loss": 0.2936, "step": 4170 }, { "epoch": 9.12, "learning_rate": 2.6258205689277895e-05, "loss": 0.2776, "step": 4180 }, { "epoch": 9.14, "learning_rate": 2.560175054704595e-05, "loss": 0.2588, "step": 4190 }, { "epoch": 9.16, "learning_rate": 2.4945295404814e-05, "loss": 0.2524, "step": 4200 }, { "epoch": 9.16, "eval_cer": 0.11163427561837456, "eval_loss": 0.4900279939174652, "eval_runtime": 77.608, "eval_samples_per_second": 25.448, "eval_steps_per_second": 1.598, "eval_wer": 0.24419099497829602, "step": 4200 }, { "epoch": 9.18, "learning_rate": 2.4288840262582053e-05, "loss": 0.275, "step": 4210 }, { "epoch": 9.2, "learning_rate": 2.3632385120350107e-05, "loss": 0.2859, "step": 4220 }, { "epoch": 9.23, "learning_rate": 2.2975929978118158e-05, "loss": 0.2422, "step": 4230 }, { "epoch": 9.25, "learning_rate": 2.231947483588621e-05, "loss": 0.2835, "step": 4240 }, { "epoch": 9.27, "learning_rate": 2.1663019693654265e-05, "loss": 0.2505, "step": 4250 }, { "epoch": 9.29, "learning_rate": 2.1006564551422316e-05, "loss": 0.2413, "step": 4260 }, { "epoch": 9.31, "learning_rate": 2.035010940919037e-05, "loss": 0.2737, "step": 4270 }, { "epoch": 9.33, "learning_rate": 1.969365426695842e-05, "loss": 0.2544, "step": 4280 }, { "epoch": 9.36, "learning_rate": 1.9037199124726474e-05, "loss": 0.258, "step": 4290 }, { "epoch": 9.38, "learning_rate": 1.8380743982494528e-05, "loss": 0.253, "step": 4300 }, { "epoch": 9.38, "eval_cer": 0.11195229681978798, "eval_loss": 0.4887804090976715, "eval_runtime": 76.736, "eval_samples_per_second": 25.738, "eval_steps_per_second": 1.616, "eval_wer": 0.24580815388543706, "step": 4300 }, { "epoch": 9.4, "learning_rate": 1.7724288840262582e-05, "loss": 0.2478, "step": 4310 }, { "epoch": 9.42, "learning_rate": 1.7067833698030632e-05, "loss": 0.243, "step": 4320 }, { "epoch": 9.44, "learning_rate": 1.6411378555798686e-05, "loss": 0.2583, "step": 4330 }, { "epoch": 9.47, "learning_rate": 1.5754923413566737e-05, "loss": 0.265, "step": 4340 }, { "epoch": 9.49, "learning_rate": 1.5098468271334792e-05, "loss": 0.247, "step": 4350 }, { "epoch": 9.51, "learning_rate": 1.4442013129102845e-05, "loss": 0.2825, "step": 4360 }, { "epoch": 9.53, "learning_rate": 1.3785557986870895e-05, "loss": 0.2637, "step": 4370 }, { "epoch": 9.55, "learning_rate": 1.3129102844638947e-05, "loss": 0.251, "step": 4380 }, { "epoch": 9.57, "learning_rate": 1.2472647702407e-05, "loss": 0.2831, "step": 4390 }, { "epoch": 9.6, "learning_rate": 1.1816192560175053e-05, "loss": 0.2591, "step": 4400 }, { "epoch": 9.6, "eval_cer": 0.11252650176678446, "eval_loss": 0.48128968477249146, "eval_runtime": 78.3767, "eval_samples_per_second": 25.199, "eval_steps_per_second": 1.582, "eval_wer": 0.24576559707209125, "step": 4400 } ], "logging_steps": 10, "max_steps": 4580, "num_train_epochs": 10, "save_steps": 100, "total_flos": 1.5294484577356087e+19, "trial_name": null, "trial_params": null }