|
{ |
|
"best_metric": 34.549659010739205, |
|
"best_model_checkpoint": "./whisper-small-hi/checkpoint-4000", |
|
"epoch": 2.546148949713558, |
|
"eval_steps": 1000, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015913430935709738, |
|
"grad_norm": 6.266472339630127, |
|
"learning_rate": 1.44e-05, |
|
"loss": 1.7883, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.031826861871419476, |
|
"grad_norm": 11.293601989746094, |
|
"learning_rate": 2.94e-05, |
|
"loss": 1.3114, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.047740292807129214, |
|
"grad_norm": 11.253959655761719, |
|
"learning_rate": 4.4399999999999995e-05, |
|
"loss": 0.7712, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06365372374283895, |
|
"grad_norm": 6.763392448425293, |
|
"learning_rate": 5.94e-05, |
|
"loss": 0.4958, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07956715467854869, |
|
"grad_norm": 7.226134300231934, |
|
"learning_rate": 7.439999999999999e-05, |
|
"loss": 0.4231, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09548058561425843, |
|
"grad_norm": 8.527499198913574, |
|
"learning_rate": 8.939999999999999e-05, |
|
"loss": 0.3815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11139401654996817, |
|
"grad_norm": 6.013850212097168, |
|
"learning_rate": 0.00010439999999999999, |
|
"loss": 0.3633, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1273074474856779, |
|
"grad_norm": 5.921957015991211, |
|
"learning_rate": 0.0001194, |
|
"loss": 0.3564, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14322087842138764, |
|
"grad_norm": 6.438776969909668, |
|
"learning_rate": 0.0001344, |
|
"loss": 0.3549, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15913430935709738, |
|
"grad_norm": 6.543764114379883, |
|
"learning_rate": 0.0001494, |
|
"loss": 0.3298, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17504774029280712, |
|
"grad_norm": 5.377121448516846, |
|
"learning_rate": 0.0001644, |
|
"loss": 0.3251, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19096117122851686, |
|
"grad_norm": 6.325755596160889, |
|
"learning_rate": 0.00017939999999999997, |
|
"loss": 0.3319, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2068746021642266, |
|
"grad_norm": 3.889002561569214, |
|
"learning_rate": 0.00019439999999999998, |
|
"loss": 0.3311, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.22278803309993633, |
|
"grad_norm": 4.005517482757568, |
|
"learning_rate": 0.00020939999999999997, |
|
"loss": 0.3388, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23870146403564607, |
|
"grad_norm": 3.7465219497680664, |
|
"learning_rate": 0.00022439999999999998, |
|
"loss": 0.3304, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2546148949713558, |
|
"grad_norm": 3.612300157546997, |
|
"learning_rate": 0.0002394, |
|
"loss": 0.3253, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.27052832590706555, |
|
"grad_norm": 2.8564534187316895, |
|
"learning_rate": 0.00025439999999999995, |
|
"loss": 0.325, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2864417568427753, |
|
"grad_norm": 3.6657044887542725, |
|
"learning_rate": 0.0002694, |
|
"loss": 0.3362, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.302355187778485, |
|
"grad_norm": 6.07236385345459, |
|
"learning_rate": 0.0002844, |
|
"loss": 0.3396, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.31826861871419476, |
|
"grad_norm": 3.5293257236480713, |
|
"learning_rate": 0.00029939999999999996, |
|
"loss": 0.3265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3341820496499045, |
|
"grad_norm": 2.7845981121063232, |
|
"learning_rate": 0.0002979428571428571, |
|
"loss": 0.3066, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.35009548058561424, |
|
"grad_norm": 3.957970380783081, |
|
"learning_rate": 0.0002958, |
|
"loss": 0.3076, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.366008911521324, |
|
"grad_norm": 2.7481985092163086, |
|
"learning_rate": 0.00029365714285714285, |
|
"loss": 0.3181, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3819223424570337, |
|
"grad_norm": 5.5784912109375, |
|
"learning_rate": 0.0002915142857142857, |
|
"loss": 0.325, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39783577339274345, |
|
"grad_norm": 2.688514232635498, |
|
"learning_rate": 0.0002893714285714285, |
|
"loss": 0.2907, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4137492043284532, |
|
"grad_norm": 1.8559094667434692, |
|
"learning_rate": 0.0002872285714285714, |
|
"loss": 0.2837, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42966263526416293, |
|
"grad_norm": 2.075263023376465, |
|
"learning_rate": 0.00028508571428571426, |
|
"loss": 0.2884, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.44557606619987267, |
|
"grad_norm": 2.253748893737793, |
|
"learning_rate": 0.00028294285714285713, |
|
"loss": 0.2652, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4614894971355824, |
|
"grad_norm": 2.5322704315185547, |
|
"learning_rate": 0.0002808, |
|
"loss": 0.2763, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47740292807129214, |
|
"grad_norm": 2.8500661849975586, |
|
"learning_rate": 0.0002786571428571428, |
|
"loss": 0.2698, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49331635900700194, |
|
"grad_norm": 2.4770638942718506, |
|
"learning_rate": 0.0002765142857142857, |
|
"loss": 0.2643, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5092297899427116, |
|
"grad_norm": 2.8346035480499268, |
|
"learning_rate": 0.00027437142857142854, |
|
"loss": 0.2682, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5251432208784214, |
|
"grad_norm": 2.551896095275879, |
|
"learning_rate": 0.0002722285714285714, |
|
"loss": 0.2484, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5410566518141311, |
|
"grad_norm": 2.1376454830169678, |
|
"learning_rate": 0.0002700857142857143, |
|
"loss": 0.2433, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5569700827498408, |
|
"grad_norm": 2.4351694583892822, |
|
"learning_rate": 0.0002679428571428571, |
|
"loss": 0.248, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5728835136855506, |
|
"grad_norm": 2.142422914505005, |
|
"learning_rate": 0.00026579999999999996, |
|
"loss": 0.2592, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5887969446212603, |
|
"grad_norm": 2.287863254547119, |
|
"learning_rate": 0.0002636571428571428, |
|
"loss": 0.2361, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.60471037555697, |
|
"grad_norm": 1.8011589050292969, |
|
"learning_rate": 0.0002615142857142857, |
|
"loss": 0.2349, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6206238064926798, |
|
"grad_norm": 2.0155577659606934, |
|
"learning_rate": 0.00025937142857142856, |
|
"loss": 0.2343, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6365372374283895, |
|
"grad_norm": 1.7602595090866089, |
|
"learning_rate": 0.0002572285714285714, |
|
"loss": 0.2369, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6365372374283895, |
|
"eval_loss": 0.24331499636173248, |
|
"eval_runtime": 3161.0794, |
|
"eval_samples_per_second": 2.642, |
|
"eval_steps_per_second": 0.331, |
|
"eval_wer": 62.18807974706697, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6524506683640993, |
|
"grad_norm": 2.084760904312134, |
|
"learning_rate": 0.0002550857142857143, |
|
"loss": 0.2352, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.668364099299809, |
|
"grad_norm": 1.6412831544876099, |
|
"learning_rate": 0.00025294285714285716, |
|
"loss": 0.2242, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6842775302355187, |
|
"grad_norm": 1.3746275901794434, |
|
"learning_rate": 0.00025079999999999997, |
|
"loss": 0.2206, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7001909611712285, |
|
"grad_norm": 2.017609119415283, |
|
"learning_rate": 0.00024865714285714284, |
|
"loss": 0.2223, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7161043921069382, |
|
"grad_norm": 2.0669291019439697, |
|
"learning_rate": 0.0002465142857142857, |
|
"loss": 0.2233, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.732017823042648, |
|
"grad_norm": 1.699652075767517, |
|
"learning_rate": 0.00024437142857142857, |
|
"loss": 0.1984, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7479312539783577, |
|
"grad_norm": 1.5082184076309204, |
|
"learning_rate": 0.00024222857142857138, |
|
"loss": 0.2143, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.7638446849140674, |
|
"grad_norm": 1.363891363143921, |
|
"learning_rate": 0.00024008571428571425, |
|
"loss": 0.2067, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7797581158497772, |
|
"grad_norm": 1.3396174907684326, |
|
"learning_rate": 0.00023794285714285712, |
|
"loss": 0.2083, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7956715467854869, |
|
"grad_norm": 1.6803642511367798, |
|
"learning_rate": 0.00023579999999999999, |
|
"loss": 0.2072, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8115849777211966, |
|
"grad_norm": 1.8154999017715454, |
|
"learning_rate": 0.00023365714285714283, |
|
"loss": 0.2137, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8274984086569064, |
|
"grad_norm": 1.9345273971557617, |
|
"learning_rate": 0.0002315142857142857, |
|
"loss": 0.1854, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8434118395926161, |
|
"grad_norm": 1.8204659223556519, |
|
"learning_rate": 0.00022937142857142856, |
|
"loss": 0.1944, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.8593252705283259, |
|
"grad_norm": 1.7036362886428833, |
|
"learning_rate": 0.00022722857142857143, |
|
"loss": 0.1925, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8752387014640356, |
|
"grad_norm": 1.5732781887054443, |
|
"learning_rate": 0.0002250857142857143, |
|
"loss": 0.2006, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.8911521323997453, |
|
"grad_norm": 1.4482449293136597, |
|
"learning_rate": 0.0002229428571428571, |
|
"loss": 0.1814, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9070655633354551, |
|
"grad_norm": 1.474438190460205, |
|
"learning_rate": 0.00022079999999999997, |
|
"loss": 0.1876, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9229789942711648, |
|
"grad_norm": 1.6966629028320312, |
|
"learning_rate": 0.00021865714285714284, |
|
"loss": 0.186, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9388924252068745, |
|
"grad_norm": 1.6632884740829468, |
|
"learning_rate": 0.0002165142857142857, |
|
"loss": 0.1846, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.9548058561425843, |
|
"grad_norm": 1.4217106103897095, |
|
"learning_rate": 0.00021437142857142855, |
|
"loss": 0.1714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9707192870782941, |
|
"grad_norm": 1.280887484550476, |
|
"learning_rate": 0.00021222857142857141, |
|
"loss": 0.1794, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.9866327180140039, |
|
"grad_norm": 1.7453186511993408, |
|
"learning_rate": 0.00021008571428571428, |
|
"loss": 0.1788, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0025461489497136, |
|
"grad_norm": 1.3471609354019165, |
|
"learning_rate": 0.00020794285714285712, |
|
"loss": 0.1631, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.0184595798854232, |
|
"grad_norm": 1.6721386909484863, |
|
"learning_rate": 0.0002058, |
|
"loss": 0.1388, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.034373010821133, |
|
"grad_norm": 1.3706496953964233, |
|
"learning_rate": 0.00020365714285714283, |
|
"loss": 0.139, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.0502864417568427, |
|
"grad_norm": 1.8108116388320923, |
|
"learning_rate": 0.0002015142857142857, |
|
"loss": 0.1486, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.0661998726925526, |
|
"grad_norm": 1.5080534219741821, |
|
"learning_rate": 0.00019937142857142856, |
|
"loss": 0.1431, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.0821133036282622, |
|
"grad_norm": 1.377472996711731, |
|
"learning_rate": 0.00019722857142857143, |
|
"loss": 0.1493, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.098026734563972, |
|
"grad_norm": 1.4746475219726562, |
|
"learning_rate": 0.00019508571428571427, |
|
"loss": 0.1386, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.1139401654996817, |
|
"grad_norm": 1.4167110919952393, |
|
"learning_rate": 0.0001929428571428571, |
|
"loss": 0.144, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.1298535964353915, |
|
"grad_norm": 1.4052375555038452, |
|
"learning_rate": 0.00019079999999999998, |
|
"loss": 0.1479, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.1457670273711011, |
|
"grad_norm": 1.1903959512710571, |
|
"learning_rate": 0.00018865714285714284, |
|
"loss": 0.1319, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.161680458306811, |
|
"grad_norm": 1.5119489431381226, |
|
"learning_rate": 0.00018651428571428568, |
|
"loss": 0.1408, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.1775938892425206, |
|
"grad_norm": 1.2824383974075317, |
|
"learning_rate": 0.00018437142857142855, |
|
"loss": 0.1379, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.1935073201782305, |
|
"grad_norm": 2.1692168712615967, |
|
"learning_rate": 0.00018222857142857142, |
|
"loss": 0.1411, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.20942075111394, |
|
"grad_norm": 1.2325959205627441, |
|
"learning_rate": 0.00018008571428571428, |
|
"loss": 0.1333, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22533418204965, |
|
"grad_norm": 0.9051541090011597, |
|
"learning_rate": 0.00017794285714285715, |
|
"loss": 0.1523, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.2412476129853596, |
|
"grad_norm": 1.3505010604858398, |
|
"learning_rate": 0.00017579999999999996, |
|
"loss": 0.1363, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.2571610439210694, |
|
"grad_norm": 1.3619211912155151, |
|
"learning_rate": 0.00017365714285714283, |
|
"loss": 0.1308, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.273074474856779, |
|
"grad_norm": 1.2368829250335693, |
|
"learning_rate": 0.0001715142857142857, |
|
"loss": 0.1242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.273074474856779, |
|
"eval_loss": 0.17340172827243805, |
|
"eval_runtime": 3339.9389, |
|
"eval_samples_per_second": 2.501, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 49.436910454391054, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.288987905792489, |
|
"grad_norm": 1.3111966848373413, |
|
"learning_rate": 0.00016937142857142856, |
|
"loss": 0.1318, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.3049013367281985, |
|
"grad_norm": 1.3677690029144287, |
|
"learning_rate": 0.0001672285714285714, |
|
"loss": 0.1368, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.3208147676639084, |
|
"grad_norm": 1.1350477933883667, |
|
"learning_rate": 0.00016508571428571427, |
|
"loss": 0.1218, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.336728198599618, |
|
"grad_norm": 1.4157230854034424, |
|
"learning_rate": 0.00016294285714285714, |
|
"loss": 0.1245, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3526416295353278, |
|
"grad_norm": 1.6313213109970093, |
|
"learning_rate": 0.0001608, |
|
"loss": 0.1249, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.3685550604710375, |
|
"grad_norm": 1.1400282382965088, |
|
"learning_rate": 0.00015865714285714282, |
|
"loss": 0.1258, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.3844684914067473, |
|
"grad_norm": 1.0388585329055786, |
|
"learning_rate": 0.00015651428571428569, |
|
"loss": 0.1278, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.400381922342457, |
|
"grad_norm": 1.1972434520721436, |
|
"learning_rate": 0.00015437142857142855, |
|
"loss": 0.1294, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4162953532781668, |
|
"grad_norm": 1.5634195804595947, |
|
"learning_rate": 0.00015222857142857142, |
|
"loss": 0.122, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.4322087842138764, |
|
"grad_norm": 1.0193285942077637, |
|
"learning_rate": 0.00015008571428571429, |
|
"loss": 0.116, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.4481222151495863, |
|
"grad_norm": 1.2780238389968872, |
|
"learning_rate": 0.00014794285714285713, |
|
"loss": 0.124, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.464035646085296, |
|
"grad_norm": 1.1720143556594849, |
|
"learning_rate": 0.0001458, |
|
"loss": 0.1267, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.4799490770210058, |
|
"grad_norm": 1.1181762218475342, |
|
"learning_rate": 0.00014365714285714286, |
|
"loss": 0.1131, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.4958625079567156, |
|
"grad_norm": 1.2259148359298706, |
|
"learning_rate": 0.0001415142857142857, |
|
"loss": 0.1121, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.5117759388924252, |
|
"grad_norm": 1.2877577543258667, |
|
"learning_rate": 0.00013937142857142857, |
|
"loss": 0.1125, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.5276893698281349, |
|
"grad_norm": 0.9741705656051636, |
|
"learning_rate": 0.0001372285714285714, |
|
"loss": 0.1122, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5436028007638447, |
|
"grad_norm": 1.1237064599990845, |
|
"learning_rate": 0.00013508571428571427, |
|
"loss": 0.1135, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.5595162316995546, |
|
"grad_norm": 0.9983360767364502, |
|
"learning_rate": 0.00013294285714285711, |
|
"loss": 0.1053, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.5754296626352642, |
|
"grad_norm": 0.941571831703186, |
|
"learning_rate": 0.00013079999999999998, |
|
"loss": 0.1104, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.5913430935709738, |
|
"grad_norm": 0.8732393383979797, |
|
"learning_rate": 0.00012865714285714285, |
|
"loss": 0.1073, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.6072565245066837, |
|
"grad_norm": 1.1299751996994019, |
|
"learning_rate": 0.0001265142857142857, |
|
"loss": 0.1121, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.6231699554423935, |
|
"grad_norm": 0.8921777606010437, |
|
"learning_rate": 0.00012437142857142855, |
|
"loss": 0.1097, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.6390833863781031, |
|
"grad_norm": 0.8513890504837036, |
|
"learning_rate": 0.00012222857142857142, |
|
"loss": 0.1082, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.6549968173138128, |
|
"grad_norm": 1.0893051624298096, |
|
"learning_rate": 0.00012008571428571428, |
|
"loss": 0.1029, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6709102482495226, |
|
"grad_norm": 0.8744276165962219, |
|
"learning_rate": 0.00011794285714285713, |
|
"loss": 0.1086, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.6868236791852325, |
|
"grad_norm": 1.1604117155075073, |
|
"learning_rate": 0.0001158, |
|
"loss": 0.1008, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.702737110120942, |
|
"grad_norm": 0.9912447929382324, |
|
"learning_rate": 0.00011365714285714284, |
|
"loss": 0.1022, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.7186505410566517, |
|
"grad_norm": 1.088443636894226, |
|
"learning_rate": 0.0001115142857142857, |
|
"loss": 0.1029, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.7345639719923616, |
|
"grad_norm": 1.1775622367858887, |
|
"learning_rate": 0.00010937142857142856, |
|
"loss": 0.1041, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.7504774029280714, |
|
"grad_norm": 1.1188244819641113, |
|
"learning_rate": 0.00010722857142857142, |
|
"loss": 0.1068, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.766390833863781, |
|
"grad_norm": 0.9570010900497437, |
|
"learning_rate": 0.00010508571428571429, |
|
"loss": 0.098, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.7823042647994907, |
|
"grad_norm": 1.4812625646591187, |
|
"learning_rate": 0.00010294285714285713, |
|
"loss": 0.0949, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7982176957352005, |
|
"grad_norm": 0.8738514184951782, |
|
"learning_rate": 0.0001008, |
|
"loss": 0.1003, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.8141311266709104, |
|
"grad_norm": 0.9198557734489441, |
|
"learning_rate": 9.865714285714285e-05, |
|
"loss": 0.0913, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.83004455760662, |
|
"grad_norm": 0.8232097625732422, |
|
"learning_rate": 9.65142857142857e-05, |
|
"loss": 0.1017, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.8459579885423296, |
|
"grad_norm": 0.7927132844924927, |
|
"learning_rate": 9.437142857142856e-05, |
|
"loss": 0.1006, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.8618714194780395, |
|
"grad_norm": 0.8228179812431335, |
|
"learning_rate": 9.222857142857142e-05, |
|
"loss": 0.0954, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.8777848504137493, |
|
"grad_norm": 1.0207316875457764, |
|
"learning_rate": 9.008571428571428e-05, |
|
"loss": 0.1015, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.893698281349459, |
|
"grad_norm": 1.3909655809402466, |
|
"learning_rate": 8.794285714285713e-05, |
|
"loss": 0.0966, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.9096117122851686, |
|
"grad_norm": 0.8367329835891724, |
|
"learning_rate": 8.579999999999998e-05, |
|
"loss": 0.1022, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.9096117122851686, |
|
"eval_loss": 0.11972030252218246, |
|
"eval_runtime": 3680.6627, |
|
"eval_samples_per_second": 2.269, |
|
"eval_steps_per_second": 0.284, |
|
"eval_wer": 39.05306890334718, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.9255251432208784, |
|
"grad_norm": 0.8776394724845886, |
|
"learning_rate": 8.365714285714285e-05, |
|
"loss": 0.0951, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.9414385741565883, |
|
"grad_norm": 0.7883875966072083, |
|
"learning_rate": 8.151428571428572e-05, |
|
"loss": 0.0887, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.957352005092298, |
|
"grad_norm": 0.9402434229850769, |
|
"learning_rate": 7.937142857142856e-05, |
|
"loss": 0.0989, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.9732654360280075, |
|
"grad_norm": 0.9308575391769409, |
|
"learning_rate": 7.722857142857143e-05, |
|
"loss": 0.0913, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.9891788669637174, |
|
"grad_norm": 0.8554713129997253, |
|
"learning_rate": 7.508571428571428e-05, |
|
"loss": 0.0841, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.0050922978994272, |
|
"grad_norm": 0.6500595211982727, |
|
"learning_rate": 7.294285714285713e-05, |
|
"loss": 0.0778, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.021005728835137, |
|
"grad_norm": 0.8015878200531006, |
|
"learning_rate": 7.079999999999999e-05, |
|
"loss": 0.0485, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.0369191597708465, |
|
"grad_norm": 0.9871166944503784, |
|
"learning_rate": 6.865714285714285e-05, |
|
"loss": 0.0538, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.0528325907065565, |
|
"grad_norm": 0.7539300918579102, |
|
"learning_rate": 6.65142857142857e-05, |
|
"loss": 0.0567, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.068746021642266, |
|
"grad_norm": 0.7191395163536072, |
|
"learning_rate": 6.437142857142857e-05, |
|
"loss": 0.0474, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.084659452577976, |
|
"grad_norm": 0.8346642851829529, |
|
"learning_rate": 6.222857142857143e-05, |
|
"loss": 0.0565, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.1005728835136854, |
|
"grad_norm": 0.890232264995575, |
|
"learning_rate": 6.008571428571428e-05, |
|
"loss": 0.0513, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.1164863144493955, |
|
"grad_norm": 0.7644281983375549, |
|
"learning_rate": 5.794285714285714e-05, |
|
"loss": 0.0494, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.132399745385105, |
|
"grad_norm": 1.0637160539627075, |
|
"learning_rate": 5.5799999999999994e-05, |
|
"loss": 0.0481, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.1483131763208148, |
|
"grad_norm": 0.7097823619842529, |
|
"learning_rate": 5.3657142857142855e-05, |
|
"loss": 0.0466, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.1642266072565244, |
|
"grad_norm": 1.0402812957763672, |
|
"learning_rate": 5.151428571428571e-05, |
|
"loss": 0.053, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.1801400381922345, |
|
"grad_norm": 1.0634698867797852, |
|
"learning_rate": 4.937142857142856e-05, |
|
"loss": 0.0486, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.196053469127944, |
|
"grad_norm": 0.5193982720375061, |
|
"learning_rate": 4.722857142857142e-05, |
|
"loss": 0.0468, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.2119669000636537, |
|
"grad_norm": 0.6877008676528931, |
|
"learning_rate": 4.5085714285714275e-05, |
|
"loss": 0.0489, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.2278803309993633, |
|
"grad_norm": 0.7828670740127563, |
|
"learning_rate": 4.294285714285714e-05, |
|
"loss": 0.0461, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.2437937619350734, |
|
"grad_norm": 0.714133620262146, |
|
"learning_rate": 4.08e-05, |
|
"loss": 0.047, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.259707192870783, |
|
"grad_norm": 0.6755720376968384, |
|
"learning_rate": 3.8657142857142856e-05, |
|
"loss": 0.0456, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.2756206238064927, |
|
"grad_norm": 0.908721387386322, |
|
"learning_rate": 3.651428571428571e-05, |
|
"loss": 0.0427, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.2915340547422023, |
|
"grad_norm": 0.703593373298645, |
|
"learning_rate": 3.437142857142857e-05, |
|
"loss": 0.049, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.307447485677912, |
|
"grad_norm": 0.46566998958587646, |
|
"learning_rate": 3.222857142857142e-05, |
|
"loss": 0.0436, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.323360916613622, |
|
"grad_norm": 0.7065137028694153, |
|
"learning_rate": 3.0085714285714283e-05, |
|
"loss": 0.0429, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.3392743475493316, |
|
"grad_norm": 0.7245275974273682, |
|
"learning_rate": 2.794285714285714e-05, |
|
"loss": 0.0436, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.3551877784850412, |
|
"grad_norm": 0.5561261177062988, |
|
"learning_rate": 2.5799999999999997e-05, |
|
"loss": 0.0466, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.3711012094207513, |
|
"grad_norm": 0.6039161086082458, |
|
"learning_rate": 2.3657142857142857e-05, |
|
"loss": 0.0416, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.387014640356461, |
|
"grad_norm": 0.5791841149330139, |
|
"learning_rate": 2.1514285714285714e-05, |
|
"loss": 0.042, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.4029280712921706, |
|
"grad_norm": 0.5699833631515503, |
|
"learning_rate": 1.937142857142857e-05, |
|
"loss": 0.0433, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.41884150222788, |
|
"grad_norm": 0.8795793056488037, |
|
"learning_rate": 1.7228571428571428e-05, |
|
"loss": 0.0428, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.4347549331635903, |
|
"grad_norm": 0.7127372622489929, |
|
"learning_rate": 1.5085714285714285e-05, |
|
"loss": 0.0386, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.4506683640993, |
|
"grad_norm": 0.7226797342300415, |
|
"learning_rate": 1.2942857142857141e-05, |
|
"loss": 0.0443, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.4665817950350095, |
|
"grad_norm": 0.738706648349762, |
|
"learning_rate": 1.0799999999999998e-05, |
|
"loss": 0.0426, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.482495225970719, |
|
"grad_norm": 0.9381580352783203, |
|
"learning_rate": 8.657142857142855e-06, |
|
"loss": 0.0436, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.498408656906429, |
|
"grad_norm": 0.863370418548584, |
|
"learning_rate": 6.514285714285714e-06, |
|
"loss": 0.0436, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.514322087842139, |
|
"grad_norm": 0.7624787092208862, |
|
"learning_rate": 4.371428571428571e-06, |
|
"loss": 0.0427, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.5302355187778485, |
|
"grad_norm": 0.6790493726730347, |
|
"learning_rate": 2.228571428571428e-06, |
|
"loss": 0.0423, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.546148949713558, |
|
"grad_norm": 0.7205957174301147, |
|
"learning_rate": 8.571428571428572e-08, |
|
"loss": 0.046, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.546148949713558, |
|
"eval_loss": 0.10670246928930283, |
|
"eval_runtime": 3687.1388, |
|
"eval_samples_per_second": 2.265, |
|
"eval_steps_per_second": 0.283, |
|
"eval_wer": 34.549659010739205, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 4.15026143428608e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|