{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4240956992309883, "eval_steps": 200, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.028481913984619765, "eval_loss": 1.2521600723266602, "eval_runtime": 184.7297, "eval_samples_per_second": 38.012, "eval_steps_per_second": 0.595, "eval_wer": 0.6291606319509959, "step": 200 }, { "epoch": 0.05696382796923953, "eval_loss": 0.6599467396736145, "eval_runtime": 185.0187, "eval_samples_per_second": 37.953, "eval_steps_per_second": 0.595, "eval_wer": 0.45444398676570247, "step": 400 }, { "epoch": 0.07120478496154942, "grad_norm": 18.865299224853516, "learning_rate": 0.00014879999999999998, "loss": 2.2791, "step": 500 }, { "epoch": 0.0854457419538593, "eval_loss": 0.6628636717796326, "eval_runtime": 185.7673, "eval_samples_per_second": 37.8, "eval_steps_per_second": 0.592, "eval_wer": 0.4394557461566059, "step": 600 }, { "epoch": 0.11392765593847906, "eval_loss": 0.7910040020942688, "eval_runtime": 186.4058, "eval_samples_per_second": 37.671, "eval_steps_per_second": 0.59, "eval_wer": 0.5453035517346763, "step": 800 }, { "epoch": 0.14240956992309883, "grad_norm": 3.8627092838287354, "learning_rate": 0.0002988, "loss": 0.8206, "step": 1000 }, { "epoch": 0.14240956992309883, "eval_loss": 0.7757941484451294, "eval_runtime": 186.8087, "eval_samples_per_second": 37.589, "eval_steps_per_second": 0.589, "eval_wer": 0.5701245033816553, "step": 1000 }, { "epoch": 0.1708914839077186, "eval_loss": 0.802534818649292, "eval_runtime": 187.4308, "eval_samples_per_second": 37.464, "eval_steps_per_second": 0.587, "eval_wer": 0.5782564211589312, "step": 1200 }, { "epoch": 0.19937339789233838, "eval_loss": 0.7715001106262207, "eval_runtime": 187.8412, "eval_samples_per_second": 37.383, "eval_steps_per_second": 0.586, "eval_wer": 0.5211336850077731, "step": 1400 }, { "epoch": 0.21361435488464825, "grad_norm": 11.042049407958984, "learning_rate": 0.00028346666666666665, "loss": 0.9068, "step": 1500 }, { "epoch": 0.22785531187695812, "eval_loss": 0.7349154949188232, "eval_runtime": 191.6788, "eval_samples_per_second": 36.634, "eval_steps_per_second": 0.574, "eval_wer": 0.512775880625573, "step": 1600 }, { "epoch": 0.2563372258615779, "eval_loss": 0.7257962226867676, "eval_runtime": 189.501, "eval_samples_per_second": 37.055, "eval_steps_per_second": 0.58, "eval_wer": 0.5152473458323922, "step": 1800 }, { "epoch": 0.28481913984619767, "grad_norm": 6.190296649932861, "learning_rate": 0.0002668, "loss": 0.8679, "step": 2000 }, { "epoch": 0.28481913984619767, "eval_loss": 0.7084089517593384, "eval_runtime": 188.267, "eval_samples_per_second": 37.298, "eval_steps_per_second": 0.584, "eval_wer": 0.5216386080070158, "step": 2000 }, { "epoch": 0.3133010538308174, "eval_loss": 0.6904259324073792, "eval_runtime": 188.556, "eval_samples_per_second": 37.241, "eval_steps_per_second": 0.583, "eval_wer": 0.5014151131426142, "step": 2200 }, { "epoch": 0.3417829678154372, "eval_loss": 0.6992842555046082, "eval_runtime": 189.0868, "eval_samples_per_second": 37.136, "eval_steps_per_second": 0.582, "eval_wer": 0.5177586733812567, "step": 2400 }, { "epoch": 0.3560239248077471, "grad_norm": 4.8257222175598145, "learning_rate": 0.0002501333333333333, "loss": 0.8577, "step": 2500 }, { "epoch": 0.37026488180005696, "eval_loss": 0.6746060848236084, "eval_runtime": 190.1492, "eval_samples_per_second": 36.929, "eval_steps_per_second": 0.578, "eval_wer": 0.48673248382253287, "step": 2600 }, { "epoch": 0.39874679578467676, "eval_loss": 0.6621994972229004, "eval_runtime": 189.6459, "eval_samples_per_second": 37.027, "eval_steps_per_second": 0.58, "eval_wer": 0.4962595835714001, "step": 2800 }, { "epoch": 0.4272287097692965, "grad_norm": 3.6695899963378906, "learning_rate": 0.00023346666666666666, "loss": 0.7995, "step": 3000 }, { "epoch": 0.4272287097692965, "eval_loss": 0.6793097853660583, "eval_runtime": 188.7722, "eval_samples_per_second": 37.198, "eval_steps_per_second": 0.583, "eval_wer": 0.49348250707556574, "step": 3000 }, { "epoch": 0.45571062375391624, "eval_loss": 0.6368467211723328, "eval_runtime": 188.0679, "eval_samples_per_second": 37.338, "eval_steps_per_second": 0.585, "eval_wer": 0.47005673740017806, "step": 3200 }, { "epoch": 0.48419253773853604, "eval_loss": 0.6363435387611389, "eval_runtime": 188.2666, "eval_samples_per_second": 37.298, "eval_steps_per_second": 0.584, "eval_wer": 0.478055780703969, "step": 3400 }, { "epoch": 0.4984334947308459, "grad_norm": 3.4502739906311035, "learning_rate": 0.0002168333333333333, "loss": 0.8141, "step": 3500 }, { "epoch": 0.5126744517231558, "eval_loss": 0.6217373609542847, "eval_runtime": 187.6755, "eval_samples_per_second": 37.416, "eval_steps_per_second": 0.586, "eval_wer": 0.46555229274904, "step": 3600 }, { "epoch": 0.5411563657077756, "eval_loss": 0.641762912273407, "eval_runtime": 186.9231, "eval_samples_per_second": 37.566, "eval_steps_per_second": 0.588, "eval_wer": 0.4940140049695053, "step": 3800 }, { "epoch": 0.5696382796923953, "grad_norm": 5.877405643463135, "learning_rate": 0.00020016666666666666, "loss": 0.7953, "step": 4000 }, { "epoch": 0.5696382796923953, "eval_loss": 0.6017736196517944, "eval_runtime": 182.787, "eval_samples_per_second": 38.416, "eval_steps_per_second": 0.602, "eval_wer": 0.4542313876081266, "step": 4000 }, { "epoch": 0.5981201936770151, "eval_loss": 0.5962206721305847, "eval_runtime": 183.0007, "eval_samples_per_second": 38.371, "eval_steps_per_second": 0.601, "eval_wer": 0.4580315975497947, "step": 4200 }, { "epoch": 0.6266021076616348, "eval_loss": 0.5883399844169617, "eval_runtime": 182.7298, "eval_samples_per_second": 38.428, "eval_steps_per_second": 0.602, "eval_wer": 0.44590015812062345, "step": 4400 }, { "epoch": 0.6408430646539447, "grad_norm": 3.615546226501465, "learning_rate": 0.0001835333333333333, "loss": 0.7596, "step": 4500 }, { "epoch": 0.6550840216462547, "eval_loss": 0.578825056552887, "eval_runtime": 183.3674, "eval_samples_per_second": 38.295, "eval_steps_per_second": 0.6, "eval_wer": 0.43253298608804264, "step": 4600 }, { "epoch": 0.6835659356308744, "eval_loss": 0.5708740949630737, "eval_runtime": 182.6951, "eval_samples_per_second": 38.436, "eval_steps_per_second": 0.602, "eval_wer": 0.4412362641013035, "step": 4800 }, { "epoch": 0.7120478496154942, "grad_norm": 4.345168590545654, "learning_rate": 0.0001669, "loss": 0.7533, "step": 5000 }, { "epoch": 0.7120478496154942, "eval_loss": 0.5594890117645264, "eval_runtime": 182.5857, "eval_samples_per_second": 38.459, "eval_steps_per_second": 0.602, "eval_wer": 0.4352170504524376, "step": 5000 }, { "epoch": 0.7405297636001139, "eval_loss": 0.5545539259910583, "eval_runtime": 182.2233, "eval_samples_per_second": 38.535, "eval_steps_per_second": 0.604, "eval_wer": 0.4231786231547057, "step": 5200 }, { "epoch": 0.7690116775847337, "eval_loss": 0.5545418858528137, "eval_runtime": 182.2691, "eval_samples_per_second": 38.525, "eval_steps_per_second": 0.604, "eval_wer": 0.4244276432054638, "step": 5400 }, { "epoch": 0.7832526345770435, "grad_norm": 9.471431732177734, "learning_rate": 0.00015026666666666667, "loss": 0.7591, "step": 5500 }, { "epoch": 0.7974935915693535, "eval_loss": 0.5442594885826111, "eval_runtime": 182.3947, "eval_samples_per_second": 38.499, "eval_steps_per_second": 0.603, "eval_wer": 0.4076455972043211, "step": 5600 }, { "epoch": 0.8259755055539733, "eval_loss": 0.5341240763664246, "eval_runtime": 182.0603, "eval_samples_per_second": 38.57, "eval_steps_per_second": 0.604, "eval_wer": 0.41462150706227824, "step": 5800 }, { "epoch": 0.854457419538593, "grad_norm": 4.406210422515869, "learning_rate": 0.00013363333333333332, "loss": 0.6621, "step": 6000 }, { "epoch": 0.854457419538593, "eval_loss": 0.5104002952575684, "eval_runtime": 181.8706, "eval_samples_per_second": 38.61, "eval_steps_per_second": 0.605, "eval_wer": 0.3955141577751498, "step": 6000 }, { "epoch": 0.8829393335232127, "eval_loss": 0.5139421820640564, "eval_runtime": 181.902, "eval_samples_per_second": 38.603, "eval_steps_per_second": 0.605, "eval_wer": 0.40112146055621256, "step": 6200 }, { "epoch": 0.9114212475078325, "eval_loss": 0.5044221878051758, "eval_runtime": 181.9538, "eval_samples_per_second": 38.592, "eval_steps_per_second": 0.605, "eval_wer": 0.38039304269256835, "step": 6400 }, { "epoch": 0.9256622045001424, "grad_norm": 8.09687328338623, "learning_rate": 0.000117, "loss": 0.6705, "step": 6500 }, { "epoch": 0.9399031614924523, "eval_loss": 0.49985769391059875, "eval_runtime": 182.1414, "eval_samples_per_second": 38.552, "eval_steps_per_second": 0.604, "eval_wer": 0.3896012437050718, "step": 6600 }, { "epoch": 0.9683850754770721, "eval_loss": 0.5097447037696838, "eval_runtime": 181.5418, "eval_samples_per_second": 38.68, "eval_steps_per_second": 0.606, "eval_wer": 0.4052804315762899, "step": 6800 }, { "epoch": 0.9968669894616918, "grad_norm": 4.639442443847656, "learning_rate": 0.00010033333333333332, "loss": 0.6665, "step": 7000 }, { "epoch": 0.9968669894616918, "eval_loss": 0.49253013730049133, "eval_runtime": 181.6405, "eval_samples_per_second": 38.659, "eval_steps_per_second": 0.606, "eval_wer": 0.3784796502743858, "step": 7000 }, { "epoch": 1.0253489034463117, "eval_loss": 0.4896470010280609, "eval_runtime": 181.3934, "eval_samples_per_second": 38.711, "eval_steps_per_second": 0.606, "eval_wer": 0.3688728258414276, "step": 7200 }, { "epoch": 1.0538308174309314, "eval_loss": 0.47494611144065857, "eval_runtime": 181.7386, "eval_samples_per_second": 38.638, "eval_steps_per_second": 0.605, "eval_wer": 0.3687399513679427, "step": 7400 }, { "epoch": 1.0680717744232413, "grad_norm": 0.6623511910438538, "learning_rate": 8.366666666666666e-05, "loss": 0.5826, "step": 7500 }, { "epoch": 1.0823127314155512, "eval_loss": 0.4684299826622009, "eval_runtime": 182.4026, "eval_samples_per_second": 38.497, "eval_steps_per_second": 0.603, "eval_wer": 0.3628004624031677, "step": 7600 }, { "epoch": 1.110794645400171, "eval_loss": 0.47290024161338806, "eval_runtime": 182.1043, "eval_samples_per_second": 38.56, "eval_steps_per_second": 0.604, "eval_wer": 0.358495329462257, "step": 7800 }, { "epoch": 1.1392765593847907, "grad_norm": 2.393817186355591, "learning_rate": 6.699999999999999e-05, "loss": 0.5836, "step": 8000 }, { "epoch": 1.1392765593847907, "eval_loss": 0.46409761905670166, "eval_runtime": 181.7327, "eval_samples_per_second": 38.639, "eval_steps_per_second": 0.605, "eval_wer": 0.3553196295459679, "step": 8000 }, { "epoch": 1.1677584733694104, "eval_loss": 0.45749881863594055, "eval_runtime": 181.5866, "eval_samples_per_second": 38.67, "eval_steps_per_second": 0.606, "eval_wer": 0.3529810388126337, "step": 8200 }, { "epoch": 1.1962403873540302, "eval_loss": 0.45851147174835205, "eval_runtime": 181.5801, "eval_samples_per_second": 38.672, "eval_steps_per_second": 0.606, "eval_wer": 0.3485563188455866, "step": 8400 }, { "epoch": 1.21048134434634, "grad_norm": 1.9676859378814697, "learning_rate": 5.033333333333333e-05, "loss": 0.5199, "step": 8500 }, { "epoch": 1.22472230133865, "eval_loss": 0.4548875391483307, "eval_runtime": 182.6274, "eval_samples_per_second": 38.45, "eval_steps_per_second": 0.602, "eval_wer": 0.3450750076402822, "step": 8600 }, { "epoch": 1.2532042153232696, "eval_loss": 0.4520675539970398, "eval_runtime": 182.8881, "eval_samples_per_second": 38.395, "eval_steps_per_second": 0.601, "eval_wer": 0.34082302448876545, "step": 8800 }, { "epoch": 1.2816861293078894, "grad_norm": 1.1400251388549805, "learning_rate": 3.373333333333333e-05, "loss": 0.5268, "step": 9000 }, { "epoch": 1.2816861293078894, "eval_loss": 0.44252264499664307, "eval_runtime": 182.3349, "eval_samples_per_second": 38.512, "eval_steps_per_second": 0.603, "eval_wer": 0.33950756720126496, "step": 9000 }, { "epoch": 1.3101680432925091, "eval_loss": 0.44072064757347107, "eval_runtime": 184.1579, "eval_samples_per_second": 38.13, "eval_steps_per_second": 0.597, "eval_wer": 0.3361857053641425, "step": 9200 }, { "epoch": 1.338649957277129, "eval_loss": 0.4383063018321991, "eval_runtime": 181.6966, "eval_samples_per_second": 38.647, "eval_steps_per_second": 0.605, "eval_wer": 0.33397998910429316, "step": 9400 }, { "epoch": 1.352890914269439, "grad_norm": 1.0755033493041992, "learning_rate": 1.71e-05, "loss": 0.5013, "step": 9500 }, { "epoch": 1.3671318712617488, "eval_loss": 0.4356846809387207, "eval_runtime": 183.1225, "eval_samples_per_second": 38.346, "eval_steps_per_second": 0.601, "eval_wer": 0.33253165734330775, "step": 9600 }, { "epoch": 1.3956137852463686, "eval_loss": 0.43495818972587585, "eval_runtime": 182.2639, "eval_samples_per_second": 38.527, "eval_steps_per_second": 0.604, "eval_wer": 0.3316812607130044, "step": 9800 }, { "epoch": 1.4240956992309883, "grad_norm": 1.6312005519866943, "learning_rate": 4.666666666666666e-07, "loss": 0.5095, "step": 10000 }, { "epoch": 1.4240956992309883, "eval_loss": 0.43451622128486633, "eval_runtime": 182.2078, "eval_samples_per_second": 38.538, "eval_steps_per_second": 0.604, "eval_wer": 0.3308175766353526, "step": 10000 }, { "epoch": 1.4240956992309883, "step": 10000, "total_flos": 4.5974516642218747e+18, "train_loss": 0.7817989181518554, "train_runtime": 11412.7197, "train_samples_per_second": 3.505, "train_steps_per_second": 0.876 } ], "logging_steps": 500, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.5974516642218747e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }