{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9357336430507162, "eval_steps": 100, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, "eval_loss": 3.5565404891967773, "eval_runtime": 151.5266, "eval_samples_per_second": 37.327, "eval_steps_per_second": 4.666, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, "eval_loss": 3.0301756858825684, "eval_runtime": 150.582, "eval_samples_per_second": 37.561, "eval_steps_per_second": 4.695, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, "eval_loss": 2.9460911750793457, "eval_runtime": 148.9065, "eval_samples_per_second": 37.984, "eval_steps_per_second": 4.748, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, "eval_loss": 1.8142520189285278, "eval_runtime": 149.8655, "eval_samples_per_second": 37.741, "eval_steps_per_second": 4.718, "eval_wer": 0.940732775914365, "step": 400 }, { "epoch": 0.09678668215253582, "grad_norm": 3.132490396499634, "learning_rate": 0.00029759999999999997, "loss": 3.9521, "step": 500 }, { "epoch": 0.09678668215253582, "eval_loss": 1.4195518493652344, "eval_runtime": 150.5171, "eval_samples_per_second": 37.577, "eval_steps_per_second": 4.697, "eval_wer": 0.8693007655149171, "step": 500 }, { "epoch": 0.11614401858304298, "eval_loss": 1.16689133644104, "eval_runtime": 150.5387, "eval_samples_per_second": 37.572, "eval_steps_per_second": 4.696, "eval_wer": 0.8055239042865626, "step": 600 }, { "epoch": 0.13550135501355012, "eval_loss": 1.0756505727767944, "eval_runtime": 151.2385, "eval_samples_per_second": 37.398, "eval_steps_per_second": 4.675, "eval_wer": 0.7596251063215163, "step": 700 }, { "epoch": 0.1548586914440573, "eval_loss": 0.9944618344306946, "eval_runtime": 151.1646, "eval_samples_per_second": 37.416, "eval_steps_per_second": 4.677, "eval_wer": 0.7223925149652549, "step": 800 }, { "epoch": 0.17421602787456447, "eval_loss": 0.9381263256072998, "eval_runtime": 151.6289, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.663, "eval_wer": 0.6870857472998347, "step": 900 }, { "epoch": 0.19357336430507163, "grad_norm": 7.335289001464844, "learning_rate": 0.0002844, "loss": 1.0266, "step": 1000 }, { "epoch": 0.19357336430507163, "eval_loss": 0.8977694511413574, "eval_runtime": 156.0202, "eval_samples_per_second": 36.252, "eval_steps_per_second": 4.531, "eval_wer": 0.661472292211648, "step": 1000 }, { "epoch": 0.2129307007355788, "eval_loss": 0.8770694136619568, "eval_runtime": 151.6589, "eval_samples_per_second": 37.294, "eval_steps_per_second": 4.662, "eval_wer": 0.6450385967164706, "step": 1100 }, { "epoch": 0.23228803716608595, "eval_loss": 0.851553201675415, "eval_runtime": 151.5945, "eval_samples_per_second": 37.31, "eval_steps_per_second": 4.664, "eval_wer": 0.640432668389209, "step": 1200 }, { "epoch": 0.2516453735965931, "eval_loss": 0.8273979425430298, "eval_runtime": 151.4524, "eval_samples_per_second": 37.345, "eval_steps_per_second": 4.668, "eval_wer": 0.6138081558633307, "step": 1300 }, { "epoch": 0.27100271002710025, "eval_loss": 0.7992698550224304, "eval_runtime": 152.8076, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.596973247099228, "step": 1400 }, { "epoch": 0.29036004645760743, "grad_norm": 4.0737223625183105, "learning_rate": 0.00026861052631578947, "loss": 0.8454, "step": 1500 }, { "epoch": 0.29036004645760743, "eval_loss": 0.7768516540527344, "eval_runtime": 152.3743, "eval_samples_per_second": 37.119, "eval_steps_per_second": 4.64, "eval_wer": 0.5887563993516394, "step": 1500 }, { "epoch": 0.3097173828881146, "eval_loss": 0.7664207220077515, "eval_runtime": 154.3668, "eval_samples_per_second": 36.64, "eval_steps_per_second": 4.58, "eval_wer": 0.5997977885124617, "step": 1600 }, { "epoch": 0.32907471931862176, "eval_loss": 0.7400562763214111, "eval_runtime": 153.7228, "eval_samples_per_second": 36.793, "eval_steps_per_second": 4.599, "eval_wer": 0.5592110542279854, "step": 1700 }, { "epoch": 0.34843205574912894, "eval_loss": 0.746478796005249, "eval_runtime": 151.7535, "eval_samples_per_second": 37.271, "eval_steps_per_second": 4.659, "eval_wer": 0.5650206223620228, "step": 1800 }, { "epoch": 0.3677893921796361, "eval_loss": 0.7252949476242065, "eval_runtime": 151.7548, "eval_samples_per_second": 37.271, "eval_steps_per_second": 4.659, "eval_wer": 0.5791272808974338, "step": 1900 }, { "epoch": 0.38714672861014326, "grad_norm": 2.4802448749542236, "learning_rate": 0.0002528210526315789, "loss": 0.7537, "step": 2000 }, { "epoch": 0.38714672861014326, "eval_loss": 0.7039346098899841, "eval_runtime": 152.7969, "eval_samples_per_second": 37.016, "eval_steps_per_second": 4.627, "eval_wer": 0.5343518800853782, "step": 2000 }, { "epoch": 0.4065040650406504, "eval_loss": 0.6932350397109985, "eval_runtime": 152.4406, "eval_samples_per_second": 37.103, "eval_steps_per_second": 4.638, "eval_wer": 0.5168429330294811, "step": 2100 }, { "epoch": 0.4258614014711576, "eval_loss": 0.696869432926178, "eval_runtime": 153.0527, "eval_samples_per_second": 36.955, "eval_steps_per_second": 4.619, "eval_wer": 0.5364381890837894, "step": 2200 }, { "epoch": 0.4452187379016647, "eval_loss": 0.6781283617019653, "eval_runtime": 152.1378, "eval_samples_per_second": 37.177, "eval_steps_per_second": 4.647, "eval_wer": 0.5173725345444624, "step": 2300 }, { "epoch": 0.4645760743321719, "eval_loss": 0.6760829091072083, "eval_runtime": 151.9712, "eval_samples_per_second": 37.218, "eval_steps_per_second": 4.652, "eval_wer": 0.5050312143923223, "step": 2400 }, { "epoch": 0.48393341076267904, "grad_norm": 3.791292667388916, "learning_rate": 0.0002370315789473684, "loss": 0.681, "step": 2500 }, { "epoch": 0.48393341076267904, "eval_loss": 0.6720712780952454, "eval_runtime": 152.2414, "eval_samples_per_second": 37.152, "eval_steps_per_second": 4.644, "eval_wer": 0.528718845789668, "step": 2500 }, { "epoch": 0.5032907471931862, "eval_loss": 0.6598270535469055, "eval_runtime": 151.7192, "eval_samples_per_second": 37.279, "eval_steps_per_second": 4.66, "eval_wer": 0.5195069891351447, "step": 2600 }, { "epoch": 0.5226480836236934, "eval_loss": 0.6555168628692627, "eval_runtime": 152.5678, "eval_samples_per_second": 37.072, "eval_steps_per_second": 4.634, "eval_wer": 0.4975846961210701, "step": 2700 }, { "epoch": 0.5420054200542005, "eval_loss": 0.6535276770591736, "eval_runtime": 152.5246, "eval_samples_per_second": 37.083, "eval_steps_per_second": 4.635, "eval_wer": 0.49936608303509816, "step": 2800 }, { "epoch": 0.5613627564847077, "eval_loss": 0.6258506178855896, "eval_runtime": 151.843, "eval_samples_per_second": 37.249, "eval_steps_per_second": 4.656, "eval_wer": 0.48192133010222915, "step": 2900 }, { "epoch": 0.5807200929152149, "grad_norm": 9.4619779586792, "learning_rate": 0.00022124210526315786, "loss": 0.6737, "step": 3000 }, { "epoch": 0.5807200929152149, "eval_loss": 0.629943311214447, "eval_runtime": 151.8389, "eval_samples_per_second": 37.25, "eval_steps_per_second": 4.656, "eval_wer": 0.48022018584198617, "step": 3000 }, { "epoch": 0.6000774293457221, "eval_loss": 0.6378594636917114, "eval_runtime": 151.6255, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.663, "eval_wer": 0.4893197027812104, "step": 3100 }, { "epoch": 0.6194347657762292, "eval_loss": 0.6225672364234924, "eval_runtime": 153.0144, "eval_samples_per_second": 36.964, "eval_steps_per_second": 4.62, "eval_wer": 0.4806053505801544, "step": 3200 }, { "epoch": 0.6387921022067363, "eval_loss": 0.6088670492172241, "eval_runtime": 152.2222, "eval_samples_per_second": 37.156, "eval_steps_per_second": 4.645, "eval_wer": 0.4627112387860891, "step": 3300 }, { "epoch": 0.6581494386372435, "eval_loss": 0.6028585433959961, "eval_runtime": 153.0615, "eval_samples_per_second": 36.952, "eval_steps_per_second": 4.619, "eval_wer": 0.47354399704707034, "step": 3400 }, { "epoch": 0.6775067750677507, "grad_norm": 3.4705822467803955, "learning_rate": 0.00020545263157894736, "loss": 0.6419, "step": 3500 }, { "epoch": 0.6775067750677507, "eval_loss": 0.5871421694755554, "eval_runtime": 152.5739, "eval_samples_per_second": 37.071, "eval_steps_per_second": 4.634, "eval_wer": 0.4592126590810611, "step": 3500 }, { "epoch": 0.6968641114982579, "eval_loss": 0.6001027226448059, "eval_runtime": 152.1697, "eval_samples_per_second": 37.169, "eval_steps_per_second": 4.646, "eval_wer": 0.4610742886488742, "step": 3600 }, { "epoch": 0.716221447928765, "eval_loss": 0.5848923921585083, "eval_runtime": 152.6563, "eval_samples_per_second": 37.051, "eval_steps_per_second": 4.631, "eval_wer": 0.4472565036670893, "step": 3700 }, { "epoch": 0.7355787843592722, "eval_loss": 0.5923960208892822, "eval_runtime": 152.6559, "eval_samples_per_second": 37.051, "eval_steps_per_second": 4.631, "eval_wer": 0.46377044181605176, "step": 3800 }, { "epoch": 0.7549361207897793, "eval_loss": 0.5767965316772461, "eval_runtime": 152.1652, "eval_samples_per_second": 37.17, "eval_steps_per_second": 4.646, "eval_wer": 0.4584904751969957, "step": 3900 }, { "epoch": 0.7742934572202865, "grad_norm": 3.628082275390625, "learning_rate": 0.00018966315789473683, "loss": 0.6183, "step": 4000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.5672534704208374, "eval_runtime": 152.4329, "eval_samples_per_second": 37.105, "eval_steps_per_second": 4.638, "eval_wer": 0.44531463144549116, "step": 4000 }, { "epoch": 0.7936507936507936, "eval_loss": 0.5575382113456726, "eval_runtime": 152.2388, "eval_samples_per_second": 37.152, "eval_steps_per_second": 4.644, "eval_wer": 0.4451862431994351, "step": 4100 }, { "epoch": 0.8130081300813008, "eval_loss": 0.5631808042526245, "eval_runtime": 152.7545, "eval_samples_per_second": 37.027, "eval_steps_per_second": 4.628, "eval_wer": 0.4474972316284444, "step": 4200 }, { "epoch": 0.832365466511808, "eval_loss": 0.5498641729354858, "eval_runtime": 153.7788, "eval_samples_per_second": 36.78, "eval_steps_per_second": 4.598, "eval_wer": 0.44008281041870617, "step": 4300 }, { "epoch": 0.8517228029423152, "eval_loss": 0.5662574172019958, "eval_runtime": 152.5034, "eval_samples_per_second": 37.088, "eval_steps_per_second": 4.636, "eval_wer": 0.43101539054099597, "step": 4400 }, { "epoch": 0.8710801393728222, "grad_norm": 2.376349925994873, "learning_rate": 0.0001738736842105263, "loss": 0.5877, "step": 4500 }, { "epoch": 0.8710801393728222, "eval_loss": 0.5584732294082642, "eval_runtime": 152.1714, "eval_samples_per_second": 37.169, "eval_steps_per_second": 4.646, "eval_wer": 0.4317215258943044, "step": 4500 }, { "epoch": 0.8904374758033294, "eval_loss": 0.5463821291923523, "eval_runtime": 152.4923, "eval_samples_per_second": 37.09, "eval_steps_per_second": 4.636, "eval_wer": 0.41997400138017366, "step": 4600 }, { "epoch": 0.9097948122338366, "eval_loss": 0.5381494164466858, "eval_runtime": 153.2139, "eval_samples_per_second": 36.916, "eval_steps_per_second": 4.614, "eval_wer": 0.4192197204345942, "step": 4700 }, { "epoch": 0.9291521486643438, "eval_loss": 0.5453722476959229, "eval_runtime": 151.9737, "eval_samples_per_second": 37.217, "eval_steps_per_second": 4.652, "eval_wer": 0.4201986808107718, "step": 4800 }, { "epoch": 0.948509485094851, "eval_loss": 0.5237515568733215, "eval_runtime": 151.8558, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.41241514339362234, "step": 4900 }, { "epoch": 0.9678668215253581, "grad_norm": 2.5489518642425537, "learning_rate": 0.0001581157894736842, "loss": 0.5621, "step": 5000 }, { "epoch": 0.9678668215253581, "eval_loss": 0.5303541421890259, "eval_runtime": 152.515, "eval_samples_per_second": 37.085, "eval_steps_per_second": 4.636, "eval_wer": 0.41353854054661293, "step": 5000 }, { "epoch": 0.9872241579558653, "eval_loss": 0.5163344740867615, "eval_runtime": 156.7945, "eval_samples_per_second": 36.073, "eval_steps_per_second": 4.509, "eval_wer": 0.4061080708061177, "step": 5100 }, { "epoch": 1.0065814943863725, "eval_loss": 0.51596599817276, "eval_runtime": 153.2891, "eval_samples_per_second": 36.898, "eval_steps_per_second": 4.612, "eval_wer": 0.39927139670363176, "step": 5200 }, { "epoch": 1.0259388308168795, "eval_loss": 0.5088583827018738, "eval_runtime": 152.7112, "eval_samples_per_second": 37.037, "eval_steps_per_second": 4.63, "eval_wer": 0.3898509091492674, "step": 5300 }, { "epoch": 1.0452961672473868, "eval_loss": 0.5110610723495483, "eval_runtime": 152.5555, "eval_samples_per_second": 37.075, "eval_steps_per_second": 4.634, "eval_wer": 0.3985652613503234, "step": 5400 }, { "epoch": 1.064653503677894, "grad_norm": 1.1362248659133911, "learning_rate": 0.0001423578947368421, "loss": 0.4882, "step": 5500 }, { "epoch": 1.064653503677894, "eval_loss": 0.5010027885437012, "eval_runtime": 152.1249, "eval_samples_per_second": 37.18, "eval_steps_per_second": 4.647, "eval_wer": 0.38574248527547306, "step": 5500 }, { "epoch": 1.084010840108401, "eval_loss": 0.49406561255455017, "eval_runtime": 151.5623, "eval_samples_per_second": 37.318, "eval_steps_per_second": 4.665, "eval_wer": 0.3858548249907721, "step": 5600 }, { "epoch": 1.1033681765389083, "eval_loss": 0.49403733015060425, "eval_runtime": 152.7631, "eval_samples_per_second": 37.025, "eval_steps_per_second": 4.628, "eval_wer": 0.3813451878480525, "step": 5700 }, { "epoch": 1.1227255129694154, "eval_loss": 0.4913772642612457, "eval_runtime": 152.1406, "eval_samples_per_second": 37.176, "eval_steps_per_second": 4.647, "eval_wer": 0.37815153022740766, "step": 5800 }, { "epoch": 1.1420828493999227, "eval_loss": 0.48747047781944275, "eval_runtime": 151.3195, "eval_samples_per_second": 37.378, "eval_steps_per_second": 4.672, "eval_wer": 0.3745406108070806, "step": 5900 }, { "epoch": 1.1614401858304297, "grad_norm": 1.0150744915008545, "learning_rate": 0.00012656842105263156, "loss": 0.4569, "step": 6000 }, { "epoch": 1.1614401858304297, "eval_loss": 0.4841971695423126, "eval_runtime": 151.8567, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.38071929514852915, "step": 6000 }, { "epoch": 1.1807975222609368, "eval_loss": 0.48611822724342346, "eval_runtime": 150.971, "eval_samples_per_second": 37.464, "eval_steps_per_second": 4.683, "eval_wer": 0.37370608720771614, "step": 6100 }, { "epoch": 1.2001548586914441, "eval_loss": 0.48144644498825073, "eval_runtime": 151.4548, "eval_samples_per_second": 37.344, "eval_steps_per_second": 4.668, "eval_wer": 0.3760973182905105, "step": 6200 }, { "epoch": 1.2195121951219512, "eval_loss": 0.47813892364501953, "eval_runtime": 151.1935, "eval_samples_per_second": 37.409, "eval_steps_per_second": 4.676, "eval_wer": 0.37409125194588433, "step": 6300 }, { "epoch": 1.2388695315524583, "eval_loss": 0.4771001935005188, "eval_runtime": 151.1732, "eval_samples_per_second": 37.414, "eval_steps_per_second": 4.677, "eval_wer": 0.36815329556579096, "step": 6400 }, { "epoch": 1.2582268679829656, "grad_norm": 1.3292571306228638, "learning_rate": 0.00011077894736842105, "loss": 0.4416, "step": 6500 }, { "epoch": 1.2582268679829656, "eval_loss": 0.47095027565956116, "eval_runtime": 151.5037, "eval_samples_per_second": 37.332, "eval_steps_per_second": 4.667, "eval_wer": 0.37338511659257595, "step": 6500 }, { "epoch": 1.2775842044134726, "eval_loss": 0.47211408615112305, "eval_runtime": 150.9455, "eval_samples_per_second": 37.47, "eval_steps_per_second": 4.684, "eval_wer": 0.3659706953828377, "step": 6600 }, { "epoch": 1.29694154084398, "eval_loss": 0.4679400622844696, "eval_runtime": 151.4191, "eval_samples_per_second": 37.353, "eval_steps_per_second": 4.669, "eval_wer": 0.3638843863844265, "step": 6700 }, { "epoch": 1.316298877274487, "eval_loss": 0.46228036284446716, "eval_runtime": 151.3839, "eval_samples_per_second": 37.362, "eval_steps_per_second": 4.67, "eval_wer": 0.366532393959333, "step": 6800 }, { "epoch": 1.3356562137049943, "eval_loss": 0.46108925342559814, "eval_runtime": 151.8163, "eval_samples_per_second": 37.256, "eval_steps_per_second": 4.657, "eval_wer": 0.3601771757795574, "step": 6900 }, { "epoch": 1.3550135501355014, "grad_norm": 0.8062695860862732, "learning_rate": 9.498947368421052e-05, "loss": 0.4324, "step": 7000 }, { "epoch": 1.3550135501355014, "eval_loss": 0.46888086199760437, "eval_runtime": 152.4379, "eval_samples_per_second": 37.104, "eval_steps_per_second": 4.638, "eval_wer": 0.3609314567251368, "step": 7000 }, { "epoch": 1.3743708865660085, "eval_loss": 0.4573034346103668, "eval_runtime": 151.3077, "eval_samples_per_second": 37.381, "eval_steps_per_second": 4.673, "eval_wer": 0.3602574184333424, "step": 7100 }, { "epoch": 1.3937282229965158, "eval_loss": 0.45749789476394653, "eval_runtime": 151.5824, "eval_samples_per_second": 37.313, "eval_steps_per_second": 4.664, "eval_wer": 0.3546083356068752, "step": 7200 }, { "epoch": 1.4130855594270229, "eval_loss": 0.4555954933166504, "eval_runtime": 151.6035, "eval_samples_per_second": 37.308, "eval_steps_per_second": 4.663, "eval_wer": 0.35836369180401534, "step": 7300 }, { "epoch": 1.43244289585753, "eval_loss": 0.4495578408241272, "eval_runtime": 152.5621, "eval_samples_per_second": 37.073, "eval_steps_per_second": 4.634, "eval_wer": 0.350724591163679, "step": 7400 }, { "epoch": 1.4518002322880372, "grad_norm": 0.7916799187660217, "learning_rate": 7.92e-05, "loss": 0.4255, "step": 7500 }, { "epoch": 1.4518002322880372, "eval_loss": 0.44609567523002625, "eval_runtime": 151.8498, "eval_samples_per_second": 37.247, "eval_steps_per_second": 4.656, "eval_wer": 0.34671245847442667, "step": 7500 }, { "epoch": 1.4711575687185443, "eval_loss": 0.44341230392456055, "eval_runtime": 152.528, "eval_samples_per_second": 37.082, "eval_steps_per_second": 4.635, "eval_wer": 0.3462470510824734, "step": 7600 }, { "epoch": 1.4905149051490514, "eval_loss": 0.44362780451774597, "eval_runtime": 152.5253, "eval_samples_per_second": 37.082, "eval_steps_per_second": 4.635, "eval_wer": 0.3516393574168285, "step": 7700 }, { "epoch": 1.5098722415795587, "eval_loss": 0.4406072199344635, "eval_runtime": 152.4039, "eval_samples_per_second": 37.112, "eval_steps_per_second": 4.639, "eval_wer": 0.34579769222127715, "step": 7800 }, { "epoch": 1.5292295780100658, "eval_loss": 0.43874725699424744, "eval_runtime": 152.6604, "eval_samples_per_second": 37.05, "eval_steps_per_second": 4.631, "eval_wer": 0.3439360626534641, "step": 7900 }, { "epoch": 1.5485869144405728, "grad_norm": 0.7491864562034607, "learning_rate": 6.344210526315788e-05, "loss": 0.4094, "step": 8000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.43253499269485474, "eval_runtime": 153.8006, "eval_samples_per_second": 36.775, "eval_steps_per_second": 4.597, "eval_wer": 0.3409831329941744, "step": 8000 }, { "epoch": 1.5679442508710801, "eval_loss": 0.4359830617904663, "eval_runtime": 153.3674, "eval_samples_per_second": 36.879, "eval_steps_per_second": 4.61, "eval_wer": 0.3419299963088379, "step": 8100 }, { "epoch": 1.5873015873015874, "eval_loss": 0.4285949170589447, "eval_runtime": 153.3711, "eval_samples_per_second": 36.878, "eval_steps_per_second": 4.61, "eval_wer": 0.3377252812505015, "step": 8200 }, { "epoch": 1.6066589237320945, "eval_loss": 0.43007034063339233, "eval_runtime": 152.2201, "eval_samples_per_second": 37.157, "eval_steps_per_second": 4.645, "eval_wer": 0.3335526632536791, "step": 8300 }, { "epoch": 1.6260162601626016, "eval_loss": 0.42966797947883606, "eval_runtime": 152.0163, "eval_samples_per_second": 37.207, "eval_steps_per_second": 4.651, "eval_wer": 0.3322848293238754, "step": 8400 }, { "epoch": 1.645373596593109, "grad_norm": 1.047472596168518, "learning_rate": 4.765263157894736e-05, "loss": 0.4018, "step": 8500 }, { "epoch": 1.645373596593109, "eval_loss": 0.4270441234111786, "eval_runtime": 152.8058, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.3338575853380623, "step": 8500 }, { "epoch": 1.664730933023616, "eval_loss": 0.4267289638519287, "eval_runtime": 152.5032, "eval_samples_per_second": 37.088, "eval_steps_per_second": 4.636, "eval_wer": 0.3319959557702492, "step": 8600 }, { "epoch": 1.684088269454123, "eval_loss": 0.4224300980567932, "eval_runtime": 152.5862, "eval_samples_per_second": 37.068, "eval_steps_per_second": 4.633, "eval_wer": 0.33275023671582865, "step": 8700 }, { "epoch": 1.7034456058846303, "eval_loss": 0.4207303822040558, "eval_runtime": 154.5205, "eval_samples_per_second": 36.604, "eval_steps_per_second": 4.575, "eval_wer": 0.32984545264881, "step": 8800 }, { "epoch": 1.7228029423151374, "eval_loss": 0.4197385013103485, "eval_runtime": 152.0624, "eval_samples_per_second": 37.195, "eval_steps_per_second": 4.649, "eval_wer": 0.32978125852578194, "step": 8900 }, { "epoch": 1.7421602787456445, "grad_norm": 1.4507739543914795, "learning_rate": 3.189473684210526e-05, "loss": 0.3899, "step": 9000 }, { "epoch": 1.7421602787456445, "eval_loss": 0.4183507561683655, "eval_runtime": 157.4278, "eval_samples_per_second": 35.928, "eval_steps_per_second": 4.491, "eval_wer": 0.3258493684903147, "step": 9000 }, { "epoch": 1.7615176151761518, "eval_loss": 0.4164830148220062, "eval_runtime": 153.0475, "eval_samples_per_second": 36.956, "eval_steps_per_second": 4.619, "eval_wer": 0.3262024361669689, "step": 9100 }, { "epoch": 1.7808749516066589, "eval_loss": 0.41182050108909607, "eval_runtime": 152.4839, "eval_samples_per_second": 37.092, "eval_steps_per_second": 4.637, "eval_wer": 0.322864341769511, "step": 9200 }, { "epoch": 1.800232288037166, "eval_loss": 0.4134317636489868, "eval_runtime": 152.6353, "eval_samples_per_second": 37.056, "eval_steps_per_second": 4.632, "eval_wer": 0.3232334579769222, "step": 9300 }, { "epoch": 1.8195896244676733, "eval_loss": 0.4126824736595154, "eval_runtime": 152.5246, "eval_samples_per_second": 37.083, "eval_steps_per_second": 4.635, "eval_wer": 0.3209064210171559, "step": 9400 }, { "epoch": 1.8389469608981805, "grad_norm": 1.0012460947036743, "learning_rate": 1.6105263157894736e-05, "loss": 0.3665, "step": 9500 }, { "epoch": 1.8389469608981805, "eval_loss": 0.41083237528800964, "eval_runtime": 152.9993, "eval_samples_per_second": 36.967, "eval_steps_per_second": 4.621, "eval_wer": 0.32109900338624, "step": 9500 }, { "epoch": 1.8583042973286876, "eval_loss": 0.4090138077735901, "eval_runtime": 152.5291, "eval_samples_per_second": 37.081, "eval_steps_per_second": 4.635, "eval_wer": 0.3199114121102213, "step": 9600 }, { "epoch": 1.8776616337591947, "eval_loss": 0.407578706741333, "eval_runtime": 153.0711, "eval_samples_per_second": 36.95, "eval_steps_per_second": 4.619, "eval_wer": 0.32087432395564186, "step": 9700 }, { "epoch": 1.897018970189702, "eval_loss": 0.40649694204330444, "eval_runtime": 154.4136, "eval_samples_per_second": 36.629, "eval_steps_per_second": 4.579, "eval_wer": 0.31981512092567926, "step": 9800 }, { "epoch": 1.916376306620209, "eval_loss": 0.40620651841163635, "eval_runtime": 153.7508, "eval_samples_per_second": 36.787, "eval_steps_per_second": 4.598, "eval_wer": 0.31923737381842693, "step": 9900 }, { "epoch": 1.9357336430507162, "grad_norm": 0.7244949340820312, "learning_rate": 3.157894736842105e-07, "loss": 0.3698, "step": 10000 }, { "epoch": 1.9357336430507162, "eval_loss": 0.4060620963573456, "eval_runtime": 153.976, "eval_samples_per_second": 36.733, "eval_steps_per_second": 4.592, "eval_wer": 0.31928551941069794, "step": 10000 }, { "epoch": 1.9357336430507162, "step": 10000, "total_flos": 1.1255918428180738e+19, "train_loss": 0.7262251342773437, "train_runtime": 19143.2125, "train_samples_per_second": 4.179, "train_steps_per_second": 0.522 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "total_flos": 1.1255918428180738e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }