|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.861003861003861, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019305019305019305, |
|
"grad_norm": 7.365023136138916, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.4936, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03861003861003861, |
|
"grad_norm": 5.869426727294922, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.2163, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05791505791505792, |
|
"grad_norm": 5.041717052459717, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.866, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"grad_norm": 4.53203010559082, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7562, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09652509652509653, |
|
"grad_norm": 5.75096321105957, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6816, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11583011583011583, |
|
"grad_norm": 5.387002944946289, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7012, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 5.036501884460449, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6297, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 4.358458042144775, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6081, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17374517374517376, |
|
"grad_norm": 4.636687278747559, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.6034, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 4.5222554206848145, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5915, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21235521235521235, |
|
"grad_norm": 4.840209007263184, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.6055, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"grad_norm": 4.3733673095703125, |
|
"learning_rate": 6e-06, |
|
"loss": 0.565, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25096525096525096, |
|
"grad_norm": 3.8710694313049316, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.5128, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 4.533177375793457, |
|
"learning_rate": 7e-06, |
|
"loss": 0.5532, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28957528957528955, |
|
"grad_norm": 4.626265048980713, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.5708, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 5.235887050628662, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3281853281853282, |
|
"grad_norm": 4.232483386993408, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5326, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3474903474903475, |
|
"grad_norm": 4.374448776245117, |
|
"learning_rate": 9e-06, |
|
"loss": 0.5205, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3667953667953668, |
|
"grad_norm": 4.335531711578369, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.5347, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 4.169550895690918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5148, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 3.5508618354797363, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.5227, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4247104247104247, |
|
"grad_norm": 3.7011241912841797, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.4989, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.444015444015444, |
|
"grad_norm": 3.37355899810791, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.5067, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 4.071038246154785, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.4884, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4826254826254826, |
|
"grad_norm": 3.6045796871185303, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.4752, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5019305019305019, |
|
"grad_norm": 3.414005994796753, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.4784, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5212355212355212, |
|
"grad_norm": 3.203928232192993, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.4915, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 4.148367404937744, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.4861, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5598455598455598, |
|
"grad_norm": 3.9569835662841797, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4744, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 3.682847261428833, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.4662, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5984555984555985, |
|
"grad_norm": 3.498784303665161, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.4479, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 3.3415608406066895, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.4422, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.637065637065637, |
|
"grad_norm": 3.7531261444091797, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.4639, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6563706563706564, |
|
"grad_norm": 3.1370999813079834, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.4286, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 3.474857807159424, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.4461, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"grad_norm": 3.5445330142974854, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.4364, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 3.309083938598633, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.4274, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7335907335907336, |
|
"grad_norm": 3.169285297393799, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4244, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.752895752895753, |
|
"grad_norm": 3.5987651348114014, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.4189, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 3.622044563293457, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.4118, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": 0.48980528116226196, |
|
"eval_runtime": 2996.853, |
|
"eval_samples_per_second": 2.343, |
|
"eval_steps_per_second": 0.146, |
|
"eval_wer": 0.3626803079679568, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7915057915057915, |
|
"grad_norm": 3.3155651092529297, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.4394, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 2.6345057487487793, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.4143, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8301158301158301, |
|
"grad_norm": 3.3825740814208984, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.399, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"grad_norm": 3.439831256866455, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.4269, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8687258687258688, |
|
"grad_norm": 2.9175798892974854, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.46, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.888030888030888, |
|
"grad_norm": 3.6642494201660156, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.4038, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9073359073359073, |
|
"grad_norm": 3.3490536212921143, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.3989, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 3.563148021697998, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.4221, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 3.2811503410339355, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.3959, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 4.630711078643799, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3992, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9845559845559846, |
|
"grad_norm": 2.9398629665374756, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.4075, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.0038610038610039, |
|
"grad_norm": 2.4827306270599365, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.3593, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0231660231660231, |
|
"grad_norm": 2.922198534011841, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.2864, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.0424710424710424, |
|
"grad_norm": 2.595409393310547, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.2928, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0617760617760619, |
|
"grad_norm": 2.6854002475738525, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.2773, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 2.5066304206848145, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2864, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1003861003861004, |
|
"grad_norm": 2.477524518966675, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.2815, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.1196911196911197, |
|
"grad_norm": 2.947479724884033, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.2814, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.138996138996139, |
|
"grad_norm": 2.497398853302002, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.27, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 2.5605263710021973, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.2778, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1776061776061777, |
|
"grad_norm": 2.8643362522125244, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.2625, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.196911196911197, |
|
"grad_norm": 2.8837692737579346, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.2732, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 2.499385118484497, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.2898, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"grad_norm": 3.035726547241211, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.279, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2548262548262548, |
|
"grad_norm": 2.872089385986328, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3005, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.2741312741312742, |
|
"grad_norm": 2.8907766342163086, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.2941, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.2934362934362935, |
|
"grad_norm": 2.8525350093841553, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.2644, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.3127413127413128, |
|
"grad_norm": 2.7391347885131836, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.2811, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.332046332046332, |
|
"grad_norm": 2.4943618774414062, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.2786, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 3.009016752243042, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.2928, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.3706563706563706, |
|
"grad_norm": 3.0978188514709473, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.298, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"grad_norm": 2.746687412261963, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.2848, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4092664092664093, |
|
"grad_norm": 2.6545677185058594, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.2755, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 3.196070671081543, |
|
"learning_rate": 7e-06, |
|
"loss": 0.2733, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.4478764478764479, |
|
"grad_norm": 2.5609796047210693, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.2659, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.4671814671814671, |
|
"grad_norm": 2.339729070663452, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.2924, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 2.7920401096343994, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.2628, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.505791505791506, |
|
"grad_norm": 2.2347285747528076, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.2816, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.525096525096525, |
|
"grad_norm": 2.6325347423553467, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.2664, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 3.093266248703003, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2629, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": 0.4272039532661438, |
|
"eval_runtime": 2939.2445, |
|
"eval_samples_per_second": 2.389, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 0.3144726762830916, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5637065637065637, |
|
"grad_norm": 2.7231810092926025, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.2671, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.583011583011583, |
|
"grad_norm": 2.4896557331085205, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.291, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.6023166023166024, |
|
"grad_norm": 2.549482583999634, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.2697, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 3.055518627166748, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.2612, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.640926640926641, |
|
"grad_norm": 2.8023324012756348, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.2612, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.6602316602316602, |
|
"grad_norm": 2.5764496326446533, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.2718, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.6795366795366795, |
|
"grad_norm": 3.0034117698669434, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.2836, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"grad_norm": 3.285400629043579, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.267, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.718146718146718, |
|
"grad_norm": 2.8904147148132324, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.2844, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 2.7520534992218018, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.2641, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 2.262890338897705, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.2636, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.776061776061776, |
|
"grad_norm": 2.410085439682007, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2753, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7953667953667953, |
|
"grad_norm": 2.5720226764678955, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.2702, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.8146718146718146, |
|
"grad_norm": 2.999687433242798, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.2998, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.833976833976834, |
|
"grad_norm": 2.5406405925750732, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.2444, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"grad_norm": 2.314110517501831, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.2664, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8725868725868726, |
|
"grad_norm": 2.745534896850586, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.2676, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 3.1628456115722656, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.2475, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.9111969111969112, |
|
"grad_norm": 2.6415674686431885, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.2565, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 2.4771909713745117, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2621, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9498069498069497, |
|
"grad_norm": 2.7251639366149902, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.2703, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.9691119691119692, |
|
"grad_norm": 2.73382830619812, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2728, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.9884169884169884, |
|
"grad_norm": 3.0017306804656982, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.2521, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"grad_norm": 1.9712167978286743, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2352, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.027027027027027, |
|
"grad_norm": 1.8378448486328125, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.1696, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.0463320463320462, |
|
"grad_norm": 2.419578790664673, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.1592, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.0656370656370657, |
|
"grad_norm": 2.145256757736206, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.1719, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.0849420849420848, |
|
"grad_norm": 2.147104024887085, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.1487, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.1042471042471043, |
|
"grad_norm": 2.280949592590332, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.1785, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 2.359393835067749, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1656, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 2.4849932193756104, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.1697, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 2.478525400161743, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.1586, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1814671814671813, |
|
"grad_norm": 2.258152723312378, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.1677, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.200772200772201, |
|
"grad_norm": 2.2826361656188965, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.1603, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.2200772200772203, |
|
"grad_norm": 2.3771138191223145, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.159, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.2393822393822393, |
|
"grad_norm": 2.353187322616577, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.1617, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.258687258687259, |
|
"grad_norm": 2.2030959129333496, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.17, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.277992277992278, |
|
"grad_norm": 2.1587460041046143, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.1618, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.2972972972972974, |
|
"grad_norm": 2.918156623840332, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1545, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 2.4041364192962646, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1677, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"eval_loss": 0.42425960302352905, |
|
"eval_runtime": 2930.947, |
|
"eval_samples_per_second": 2.396, |
|
"eval_steps_per_second": 0.15, |
|
"eval_wer": 0.3038651796263357, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.335907335907336, |
|
"grad_norm": 1.8374619483947754, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.1505, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.3552123552123554, |
|
"grad_norm": 2.2818222045898438, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.1635, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.3745173745173744, |
|
"grad_norm": 2.3476946353912354, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.1572, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.393822393822394, |
|
"grad_norm": 2.727679491043091, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.1567, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.413127413127413, |
|
"grad_norm": 2.148946523666382, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.1629, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 2.3598289489746094, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.1537, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.4517374517374515, |
|
"grad_norm": 2.627530813217163, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.1617, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.471042471042471, |
|
"grad_norm": 2.1012682914733887, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1541, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4903474903474905, |
|
"grad_norm": 2.719054937362671, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.1479, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.5096525096525095, |
|
"grad_norm": 1.9170578718185425, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.1558, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.528957528957529, |
|
"grad_norm": 2.209770917892456, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.1494, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.5482625482625485, |
|
"grad_norm": 2.3125557899475098, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.1549, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.5675675675675675, |
|
"grad_norm": 2.2932839393615723, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.1529, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.586872586872587, |
|
"grad_norm": 2.347710371017456, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.1637, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.606177606177606, |
|
"grad_norm": 2.479405641555786, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.162, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.6254826254826256, |
|
"grad_norm": 2.7078042030334473, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.1606, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.6447876447876446, |
|
"grad_norm": 2.097592830657959, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.1554, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.664092664092664, |
|
"grad_norm": 2.405099630355835, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.1611, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.6833976833976836, |
|
"grad_norm": 2.120976686477661, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.1537, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 2.165222406387329, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.156, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.722007722007722, |
|
"grad_norm": 2.3541574478149414, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.1571, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.741312741312741, |
|
"grad_norm": 2.5069708824157715, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.1467, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.7606177606177607, |
|
"grad_norm": 1.9154902696609497, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.1556, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.7799227799227797, |
|
"grad_norm": 1.964211106300354, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.1534, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.799227799227799, |
|
"grad_norm": 2.3793370723724365, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.1578, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.8185328185328187, |
|
"grad_norm": 2.7694272994995117, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1518, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.8378378378378377, |
|
"grad_norm": 2.1063687801361084, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.1534, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 2.708767890930176, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.153, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.8764478764478767, |
|
"grad_norm": 1.7426552772521973, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.1501, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.8957528957528957, |
|
"grad_norm": 2.721165180206299, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.1613, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.915057915057915, |
|
"grad_norm": 2.525545358657837, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.1534, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.9343629343629343, |
|
"grad_norm": 2.6334362030029297, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.158, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.9536679536679538, |
|
"grad_norm": 2.1491916179656982, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.15, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 2.0909786224365234, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.147, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.9922779922779923, |
|
"grad_norm": 2.8027398586273193, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1642, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 3.011583011583012, |
|
"grad_norm": 1.8371341228485107, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.1176, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.030888030888031, |
|
"grad_norm": 2.1273913383483887, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.0937, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 3.0501930501930503, |
|
"grad_norm": 1.8209782838821411, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.089, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.0694980694980694, |
|
"grad_norm": 2.118212938308716, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.0888, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"grad_norm": 1.675663709640503, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"eval_loss": 0.4492855668067932, |
|
"eval_runtime": 2954.4178, |
|
"eval_samples_per_second": 2.377, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 0.2968411212526454, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.108108108108108, |
|
"grad_norm": 2.0060861110687256, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.084, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 3.1274131274131274, |
|
"grad_norm": 1.8335351943969727, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.0871, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.146718146718147, |
|
"grad_norm": 2.165919303894043, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.0892, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 3.166023166023166, |
|
"grad_norm": 1.5656211376190186, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0869, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.1853281853281854, |
|
"grad_norm": 1.9687210321426392, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.0878, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.2046332046332044, |
|
"grad_norm": 2.017395496368408, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.0823, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.223938223938224, |
|
"grad_norm": 2.3069615364074707, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.0894, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"grad_norm": 1.8950186967849731, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.0889, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.2625482625482625, |
|
"grad_norm": 1.9198052883148193, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.081, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.281853281853282, |
|
"grad_norm": 2.25732421875, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0872, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.301158301158301, |
|
"grad_norm": 1.6695570945739746, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.0827, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.3204633204633205, |
|
"grad_norm": 2.1860039234161377, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.0864, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.33976833976834, |
|
"grad_norm": 2.0618574619293213, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0843, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 3.359073359073359, |
|
"grad_norm": 1.952563762664795, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.0822, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.3783783783783785, |
|
"grad_norm": 2.2584211826324463, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.087, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.3976833976833976, |
|
"grad_norm": 1.7842656373977661, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0856, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.416988416988417, |
|
"grad_norm": 2.130709409713745, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.0886, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.436293436293436, |
|
"grad_norm": 1.505071759223938, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.0795, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.4555984555984556, |
|
"grad_norm": 2.2736964225769043, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.0802, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.474903474903475, |
|
"grad_norm": 1.685120940208435, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.085, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.494208494208494, |
|
"grad_norm": 1.8761180639266968, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.0732, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.5135135135135136, |
|
"grad_norm": 1.7968302965164185, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0868, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.532818532818533, |
|
"grad_norm": 1.6335569620132446, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.0787, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.552123552123552, |
|
"grad_norm": 1.402917742729187, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0779, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 1.4878441095352173, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0795, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.5907335907335907, |
|
"grad_norm": 1.846757411956787, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.0868, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.61003861003861, |
|
"grad_norm": 1.496010422706604, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.0836, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.629343629343629, |
|
"grad_norm": 2.0897486209869385, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0839, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.6486486486486487, |
|
"grad_norm": 1.6161247491836548, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.0858, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.667953667953668, |
|
"grad_norm": 1.9887114763259888, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0864, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.687258687258687, |
|
"grad_norm": 2.271867513656616, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0815, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.7065637065637067, |
|
"grad_norm": 1.9788146018981934, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0821, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.7258687258687258, |
|
"grad_norm": 1.8956130743026733, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.0789, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.7451737451737452, |
|
"grad_norm": 2.4207746982574463, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.1009, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.7644787644787643, |
|
"grad_norm": 1.7667814493179321, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.0836, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.7837837837837838, |
|
"grad_norm": 2.118232488632202, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0824, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.8030888030888033, |
|
"grad_norm": 1.5285857915878296, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.0836, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.8223938223938223, |
|
"grad_norm": 2.041189670562744, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.0838, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.841698841698842, |
|
"grad_norm": 2.062230110168457, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.0804, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"grad_norm": 1.9036461114883423, |
|
"learning_rate": 0.0, |
|
"loss": 0.0845, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"eval_loss": 0.447899729013443, |
|
"eval_runtime": 2978.0577, |
|
"eval_samples_per_second": 2.358, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 0.29472481531011024, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"step": 5000, |
|
"total_flos": 5.435487665750016e+20, |
|
"train_loss": 0.2695653033494949, |
|
"train_runtime": 64824.1008, |
|
"train_samples_per_second": 2.468, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.435487665750016e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|