|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4240956992309883, |
|
"eval_steps": 200, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028481913984619765, |
|
"eval_loss": 1.2521600723266602, |
|
"eval_runtime": 184.7297, |
|
"eval_samples_per_second": 38.012, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 0.6291606319509959, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05696382796923953, |
|
"eval_loss": 0.6599467396736145, |
|
"eval_runtime": 185.0187, |
|
"eval_samples_per_second": 37.953, |
|
"eval_steps_per_second": 0.595, |
|
"eval_wer": 0.45444398676570247, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07120478496154942, |
|
"grad_norm": 18.865299224853516, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 2.2791, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0854457419538593, |
|
"eval_loss": 0.6628636717796326, |
|
"eval_runtime": 185.7673, |
|
"eval_samples_per_second": 37.8, |
|
"eval_steps_per_second": 0.592, |
|
"eval_wer": 0.4394557461566059, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11392765593847906, |
|
"eval_loss": 0.7910040020942688, |
|
"eval_runtime": 186.4058, |
|
"eval_samples_per_second": 37.671, |
|
"eval_steps_per_second": 0.59, |
|
"eval_wer": 0.5453035517346763, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14240956992309883, |
|
"grad_norm": 3.8627092838287354, |
|
"learning_rate": 0.0002988, |
|
"loss": 0.8206, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14240956992309883, |
|
"eval_loss": 0.7757941484451294, |
|
"eval_runtime": 186.8087, |
|
"eval_samples_per_second": 37.589, |
|
"eval_steps_per_second": 0.589, |
|
"eval_wer": 0.5701245033816553, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1708914839077186, |
|
"eval_loss": 0.802534818649292, |
|
"eval_runtime": 187.4308, |
|
"eval_samples_per_second": 37.464, |
|
"eval_steps_per_second": 0.587, |
|
"eval_wer": 0.5782564211589312, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19937339789233838, |
|
"eval_loss": 0.7715001106262207, |
|
"eval_runtime": 187.8412, |
|
"eval_samples_per_second": 37.383, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.5211336850077731, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.21361435488464825, |
|
"grad_norm": 11.042049407958984, |
|
"learning_rate": 0.00028346666666666665, |
|
"loss": 0.9068, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22785531187695812, |
|
"eval_loss": 0.7349154949188232, |
|
"eval_runtime": 191.6788, |
|
"eval_samples_per_second": 36.634, |
|
"eval_steps_per_second": 0.574, |
|
"eval_wer": 0.512775880625573, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2563372258615779, |
|
"eval_loss": 0.7257962226867676, |
|
"eval_runtime": 189.501, |
|
"eval_samples_per_second": 37.055, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.5152473458323922, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28481913984619767, |
|
"grad_norm": 6.190296649932861, |
|
"learning_rate": 0.0002668, |
|
"loss": 0.8679, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28481913984619767, |
|
"eval_loss": 0.7084089517593384, |
|
"eval_runtime": 188.267, |
|
"eval_samples_per_second": 37.298, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.5216386080070158, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3133010538308174, |
|
"eval_loss": 0.6904259324073792, |
|
"eval_runtime": 188.556, |
|
"eval_samples_per_second": 37.241, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.5014151131426142, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3417829678154372, |
|
"eval_loss": 0.6992842555046082, |
|
"eval_runtime": 189.0868, |
|
"eval_samples_per_second": 37.136, |
|
"eval_steps_per_second": 0.582, |
|
"eval_wer": 0.5177586733812567, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3560239248077471, |
|
"grad_norm": 4.8257222175598145, |
|
"learning_rate": 0.0002501333333333333, |
|
"loss": 0.8577, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37026488180005696, |
|
"eval_loss": 0.6746060848236084, |
|
"eval_runtime": 190.1492, |
|
"eval_samples_per_second": 36.929, |
|
"eval_steps_per_second": 0.578, |
|
"eval_wer": 0.48673248382253287, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.39874679578467676, |
|
"eval_loss": 0.6621994972229004, |
|
"eval_runtime": 189.6459, |
|
"eval_samples_per_second": 37.027, |
|
"eval_steps_per_second": 0.58, |
|
"eval_wer": 0.4962595835714001, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4272287097692965, |
|
"grad_norm": 3.6695899963378906, |
|
"learning_rate": 0.00023346666666666666, |
|
"loss": 0.7995, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4272287097692965, |
|
"eval_loss": 0.6793097853660583, |
|
"eval_runtime": 188.7722, |
|
"eval_samples_per_second": 37.198, |
|
"eval_steps_per_second": 0.583, |
|
"eval_wer": 0.49348250707556574, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45571062375391624, |
|
"eval_loss": 0.6368467211723328, |
|
"eval_runtime": 188.0679, |
|
"eval_samples_per_second": 37.338, |
|
"eval_steps_per_second": 0.585, |
|
"eval_wer": 0.47005673740017806, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48419253773853604, |
|
"eval_loss": 0.6363435387611389, |
|
"eval_runtime": 188.2666, |
|
"eval_samples_per_second": 37.298, |
|
"eval_steps_per_second": 0.584, |
|
"eval_wer": 0.478055780703969, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4984334947308459, |
|
"grad_norm": 3.4502739906311035, |
|
"learning_rate": 0.0002168333333333333, |
|
"loss": 0.8141, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5126744517231558, |
|
"eval_loss": 0.6217373609542847, |
|
"eval_runtime": 187.6755, |
|
"eval_samples_per_second": 37.416, |
|
"eval_steps_per_second": 0.586, |
|
"eval_wer": 0.46555229274904, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5411563657077756, |
|
"eval_loss": 0.641762912273407, |
|
"eval_runtime": 186.9231, |
|
"eval_samples_per_second": 37.566, |
|
"eval_steps_per_second": 0.588, |
|
"eval_wer": 0.4940140049695053, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5696382796923953, |
|
"grad_norm": 5.877405643463135, |
|
"learning_rate": 0.00020016666666666666, |
|
"loss": 0.7953, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5696382796923953, |
|
"eval_loss": 0.6017736196517944, |
|
"eval_runtime": 182.787, |
|
"eval_samples_per_second": 38.416, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.4542313876081266, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5981201936770151, |
|
"eval_loss": 0.5962206721305847, |
|
"eval_runtime": 183.0007, |
|
"eval_samples_per_second": 38.371, |
|
"eval_steps_per_second": 0.601, |
|
"eval_wer": 0.4580315975497947, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6266021076616348, |
|
"eval_loss": 0.5883399844169617, |
|
"eval_runtime": 182.7298, |
|
"eval_samples_per_second": 38.428, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.44590015812062345, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6408430646539447, |
|
"grad_norm": 3.615546226501465, |
|
"learning_rate": 0.0001835333333333333, |
|
"loss": 0.7596, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6550840216462547, |
|
"eval_loss": 0.578825056552887, |
|
"eval_runtime": 183.3674, |
|
"eval_samples_per_second": 38.295, |
|
"eval_steps_per_second": 0.6, |
|
"eval_wer": 0.43253298608804264, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6835659356308744, |
|
"eval_loss": 0.5708740949630737, |
|
"eval_runtime": 182.6951, |
|
"eval_samples_per_second": 38.436, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.4412362641013035, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7120478496154942, |
|
"grad_norm": 4.345168590545654, |
|
"learning_rate": 0.0001669, |
|
"loss": 0.7533, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7120478496154942, |
|
"eval_loss": 0.5594890117645264, |
|
"eval_runtime": 182.5857, |
|
"eval_samples_per_second": 38.459, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.4352170504524376, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7405297636001139, |
|
"eval_loss": 0.5545539259910583, |
|
"eval_runtime": 182.2233, |
|
"eval_samples_per_second": 38.535, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.4231786231547057, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7690116775847337, |
|
"eval_loss": 0.5545418858528137, |
|
"eval_runtime": 182.2691, |
|
"eval_samples_per_second": 38.525, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.4244276432054638, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7832526345770435, |
|
"grad_norm": 9.471431732177734, |
|
"learning_rate": 0.00015026666666666667, |
|
"loss": 0.7591, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7974935915693535, |
|
"eval_loss": 0.5442594885826111, |
|
"eval_runtime": 182.3947, |
|
"eval_samples_per_second": 38.499, |
|
"eval_steps_per_second": 0.603, |
|
"eval_wer": 0.4076455972043211, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8259755055539733, |
|
"eval_loss": 0.5341240763664246, |
|
"eval_runtime": 182.0603, |
|
"eval_samples_per_second": 38.57, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.41462150706227824, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.854457419538593, |
|
"grad_norm": 4.406210422515869, |
|
"learning_rate": 0.00013363333333333332, |
|
"loss": 0.6621, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.854457419538593, |
|
"eval_loss": 0.5104002952575684, |
|
"eval_runtime": 181.8706, |
|
"eval_samples_per_second": 38.61, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.3955141577751498, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8829393335232127, |
|
"eval_loss": 0.5139421820640564, |
|
"eval_runtime": 181.902, |
|
"eval_samples_per_second": 38.603, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.40112146055621256, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.9114212475078325, |
|
"eval_loss": 0.5044221878051758, |
|
"eval_runtime": 181.9538, |
|
"eval_samples_per_second": 38.592, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.38039304269256835, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.9256622045001424, |
|
"grad_norm": 8.09687328338623, |
|
"learning_rate": 0.000117, |
|
"loss": 0.6705, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9399031614924523, |
|
"eval_loss": 0.49985769391059875, |
|
"eval_runtime": 182.1414, |
|
"eval_samples_per_second": 38.552, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.3896012437050718, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.9683850754770721, |
|
"eval_loss": 0.5097447037696838, |
|
"eval_runtime": 181.5418, |
|
"eval_samples_per_second": 38.68, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.4052804315762899, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.9968669894616918, |
|
"grad_norm": 4.639442443847656, |
|
"learning_rate": 0.00010033333333333332, |
|
"loss": 0.6665, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9968669894616918, |
|
"eval_loss": 0.49253013730049133, |
|
"eval_runtime": 181.6405, |
|
"eval_samples_per_second": 38.659, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.3784796502743858, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0253489034463117, |
|
"eval_loss": 0.4896470010280609, |
|
"eval_runtime": 181.3934, |
|
"eval_samples_per_second": 38.711, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.3688728258414276, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.0538308174309314, |
|
"eval_loss": 0.47494611144065857, |
|
"eval_runtime": 181.7386, |
|
"eval_samples_per_second": 38.638, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.3687399513679427, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.0680717744232413, |
|
"grad_norm": 0.6623511910438538, |
|
"learning_rate": 8.366666666666666e-05, |
|
"loss": 0.5826, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0823127314155512, |
|
"eval_loss": 0.4684299826622009, |
|
"eval_runtime": 182.4026, |
|
"eval_samples_per_second": 38.497, |
|
"eval_steps_per_second": 0.603, |
|
"eval_wer": 0.3628004624031677, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.110794645400171, |
|
"eval_loss": 0.47290024161338806, |
|
"eval_runtime": 182.1043, |
|
"eval_samples_per_second": 38.56, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.358495329462257, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.1392765593847907, |
|
"grad_norm": 2.393817186355591, |
|
"learning_rate": 6.699999999999999e-05, |
|
"loss": 0.5836, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1392765593847907, |
|
"eval_loss": 0.46409761905670166, |
|
"eval_runtime": 181.7327, |
|
"eval_samples_per_second": 38.639, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.3553196295459679, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1677584733694104, |
|
"eval_loss": 0.45749881863594055, |
|
"eval_runtime": 181.5866, |
|
"eval_samples_per_second": 38.67, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.3529810388126337, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.1962403873540302, |
|
"eval_loss": 0.45851147174835205, |
|
"eval_runtime": 181.5801, |
|
"eval_samples_per_second": 38.672, |
|
"eval_steps_per_second": 0.606, |
|
"eval_wer": 0.3485563188455866, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.21048134434634, |
|
"grad_norm": 1.9676859378814697, |
|
"learning_rate": 5.033333333333333e-05, |
|
"loss": 0.5199, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.22472230133865, |
|
"eval_loss": 0.4548875391483307, |
|
"eval_runtime": 182.6274, |
|
"eval_samples_per_second": 38.45, |
|
"eval_steps_per_second": 0.602, |
|
"eval_wer": 0.3450750076402822, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.2532042153232696, |
|
"eval_loss": 0.4520675539970398, |
|
"eval_runtime": 182.8881, |
|
"eval_samples_per_second": 38.395, |
|
"eval_steps_per_second": 0.601, |
|
"eval_wer": 0.34082302448876545, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.2816861293078894, |
|
"grad_norm": 1.1400251388549805, |
|
"learning_rate": 3.373333333333333e-05, |
|
"loss": 0.5268, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.2816861293078894, |
|
"eval_loss": 0.44252264499664307, |
|
"eval_runtime": 182.3349, |
|
"eval_samples_per_second": 38.512, |
|
"eval_steps_per_second": 0.603, |
|
"eval_wer": 0.33950756720126496, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3101680432925091, |
|
"eval_loss": 0.44072064757347107, |
|
"eval_runtime": 184.1579, |
|
"eval_samples_per_second": 38.13, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 0.3361857053641425, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.338649957277129, |
|
"eval_loss": 0.4383063018321991, |
|
"eval_runtime": 181.6966, |
|
"eval_samples_per_second": 38.647, |
|
"eval_steps_per_second": 0.605, |
|
"eval_wer": 0.33397998910429316, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.352890914269439, |
|
"grad_norm": 1.0755033493041992, |
|
"learning_rate": 1.71e-05, |
|
"loss": 0.5013, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.3671318712617488, |
|
"eval_loss": 0.4356846809387207, |
|
"eval_runtime": 183.1225, |
|
"eval_samples_per_second": 38.346, |
|
"eval_steps_per_second": 0.601, |
|
"eval_wer": 0.33253165734330775, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.3956137852463686, |
|
"eval_loss": 0.43495818972587585, |
|
"eval_runtime": 182.2639, |
|
"eval_samples_per_second": 38.527, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.3316812607130044, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.4240956992309883, |
|
"grad_norm": 1.6312005519866943, |
|
"learning_rate": 4.666666666666666e-07, |
|
"loss": 0.5095, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.4240956992309883, |
|
"eval_loss": 0.43451622128486633, |
|
"eval_runtime": 182.2078, |
|
"eval_samples_per_second": 38.538, |
|
"eval_steps_per_second": 0.604, |
|
"eval_wer": 0.3308175766353526, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.4240956992309883, |
|
"step": 10000, |
|
"total_flos": 4.5974516642218747e+18, |
|
"train_loss": 0.7817989181518554, |
|
"train_runtime": 11412.7197, |
|
"train_samples_per_second": 3.505, |
|
"train_steps_per_second": 0.876 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.5974516642218747e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|