|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.861003861003861, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019305019305019305, |
|
"grad_norm": 7.541379928588867, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.5011, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03861003861003861, |
|
"grad_norm": 5.699951648712158, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.1952, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05791505791505792, |
|
"grad_norm": 4.949509143829346, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.8322, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"grad_norm": 4.566027641296387, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09652509652509653, |
|
"grad_norm": 5.739590167999268, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.658, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11583011583011583, |
|
"grad_norm": 5.6211347579956055, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6784, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 5.0315046310424805, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6025, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 4.241130828857422, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5837, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17374517374517376, |
|
"grad_norm": 4.572744369506836, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.5771, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 4.686110496520996, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5647, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21235521235521235, |
|
"grad_norm": 4.762725353240967, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.5817, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"grad_norm": 4.6283278465271, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25096525096525096, |
|
"grad_norm": 3.748809576034546, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.4916, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 4.640991687774658, |
|
"learning_rate": 7e-06, |
|
"loss": 0.5268, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28957528957528955, |
|
"grad_norm": 4.591019630432129, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.5422, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 5.206230640411377, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.5214, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3281853281853282, |
|
"grad_norm": 4.378481388092041, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5085, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3474903474903475, |
|
"grad_norm": 4.319910526275635, |
|
"learning_rate": 9e-06, |
|
"loss": 0.4955, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3667953667953668, |
|
"grad_norm": 4.216291427612305, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.5112, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 4.130216121673584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 3.5388808250427246, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 0.4995, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4247104247104247, |
|
"grad_norm": 3.57562255859375, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.4747, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.444015444015444, |
|
"grad_norm": 3.1933300495147705, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.4789, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 3.9434165954589844, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.4642, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4826254826254826, |
|
"grad_norm": 3.4227051734924316, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.4493, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5019305019305019, |
|
"grad_norm": 3.3387601375579834, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.4561, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5212355212355212, |
|
"grad_norm": 3.66536021232605, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.4703, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 3.9533724784851074, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.4602, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5598455598455598, |
|
"grad_norm": 3.853438377380371, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.4524, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 3.8207361698150635, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.4422, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5984555984555985, |
|
"grad_norm": 3.5014588832855225, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 0.4261, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 3.4164435863494873, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.4244, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.637065637065637, |
|
"grad_norm": 3.5216405391693115, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 0.4406, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6563706563706564, |
|
"grad_norm": 3.0401480197906494, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 0.4047, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 3.4229395389556885, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.4205, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"grad_norm": 3.6540348529815674, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.4143, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 3.199246883392334, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 0.4062, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7335907335907336, |
|
"grad_norm": 3.220662832260132, |
|
"learning_rate": 9e-06, |
|
"loss": 0.405, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.752895752895753, |
|
"grad_norm": 3.5012640953063965, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.3942, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 3.518545150756836, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.3936, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": 0.45976704359054565, |
|
"eval_runtime": 2976.7472, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 0.34102477535968423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7915057915057915, |
|
"grad_norm": 3.2509634494781494, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.4169, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 2.5278899669647217, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 0.3895, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8301158301158301, |
|
"grad_norm": 3.232598066329956, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 0.3782, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"grad_norm": 3.384092092514038, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.406, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8687258687258688, |
|
"grad_norm": 3.261749267578125, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.4397, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.888030888030888, |
|
"grad_norm": 3.817667007446289, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 0.3846, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9073359073359073, |
|
"grad_norm": 3.3195998668670654, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.3748, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 3.510660171508789, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.3977, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 2.8854782581329346, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.3768, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 3.2596817016601562, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3791, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9845559845559846, |
|
"grad_norm": 2.9499335289001465, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 0.3839, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.0038610038610039, |
|
"grad_norm": 2.457566738128662, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.3371, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0231660231660231, |
|
"grad_norm": 2.8226237297058105, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.2705, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.0424710424710424, |
|
"grad_norm": 3.664156198501587, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.2729, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0617760617760619, |
|
"grad_norm": 2.497749090194702, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.2613, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 2.437830686569214, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2718, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1003861003861004, |
|
"grad_norm": 2.5171914100646973, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 0.2649, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.1196911196911197, |
|
"grad_norm": 3.023686647415161, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 0.2662, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.138996138996139, |
|
"grad_norm": 2.358494520187378, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.2511, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.1583011583011582, |
|
"grad_norm": 2.428818941116333, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.2626, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1776061776061777, |
|
"grad_norm": 3.066359281539917, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 0.2504, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.196911196911197, |
|
"grad_norm": 2.8853089809417725, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.2565, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 2.476994514465332, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 0.274, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"grad_norm": 3.0519564151763916, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.2604, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2548262548262548, |
|
"grad_norm": 2.985853910446167, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2832, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.2741312741312742, |
|
"grad_norm": 2.780881643295288, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.2743, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.2934362934362935, |
|
"grad_norm": 2.6131482124328613, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 0.2483, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.3127413127413128, |
|
"grad_norm": 3.0259549617767334, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.2686, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.332046332046332, |
|
"grad_norm": 2.420754909515381, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.265, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 3.001450300216675, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.2772, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.3706563706563706, |
|
"grad_norm": 3.064401626586914, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.2844, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"grad_norm": 2.651357412338257, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.2681, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4092664092664093, |
|
"grad_norm": 2.363473415374756, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.2567, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 3.044689178466797, |
|
"learning_rate": 7e-06, |
|
"loss": 0.2566, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.4478764478764479, |
|
"grad_norm": 2.3913726806640625, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.2506, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.4671814671814671, |
|
"grad_norm": 2.2928853034973145, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.2727, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 2.741959571838379, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.2467, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.505791505791506, |
|
"grad_norm": 2.0824573040008545, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.2635, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.525096525096525, |
|
"grad_norm": 2.4026057720184326, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.2494, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 3.137629270553589, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.2493, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": 0.40415582060813904, |
|
"eval_runtime": 2884.198, |
|
"eval_samples_per_second": 2.435, |
|
"eval_steps_per_second": 0.152, |
|
"eval_wer": 0.29395418895756503, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5637065637065637, |
|
"grad_norm": 2.6668949127197266, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.2522, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.583011583011583, |
|
"grad_norm": 2.5405499935150146, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.2747, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.6023166023166024, |
|
"grad_norm": 2.4073848724365234, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.2563, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 3.3368024826049805, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.2477, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.640926640926641, |
|
"grad_norm": 2.741755723953247, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.2444, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.6602316602316602, |
|
"grad_norm": 2.5123753547668457, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.253, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.6795366795366795, |
|
"grad_norm": 2.8450229167938232, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.2695, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"grad_norm": 2.2329864501953125, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.2531, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.718146718146718, |
|
"grad_norm": 2.8518948554992676, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.2671, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.7374517374517375, |
|
"grad_norm": 2.6565253734588623, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.2477, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 2.2332699298858643, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.2461, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.776061776061776, |
|
"grad_norm": 2.5508103370666504, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2574, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7953667953667953, |
|
"grad_norm": 2.47121000289917, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.2519, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.8146718146718146, |
|
"grad_norm": 2.798379898071289, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.2774, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.833976833976834, |
|
"grad_norm": 2.6081783771514893, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.2271, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"grad_norm": 2.355163335800171, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.248, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8725868725868726, |
|
"grad_norm": 2.8276679515838623, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.2575, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 2.9946837425231934, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.234, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.9111969111969112, |
|
"grad_norm": 2.660792350769043, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.2373, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.9305019305019306, |
|
"grad_norm": 2.4244725704193115, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.2443, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9498069498069497, |
|
"grad_norm": 2.6997570991516113, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.2553, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.9691119691119692, |
|
"grad_norm": 2.6614644527435303, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.2526, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.9884169884169884, |
|
"grad_norm": 2.8852739334106445, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.2363, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"grad_norm": 1.8691250085830688, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2197, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.027027027027027, |
|
"grad_norm": 1.8070666790008545, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.1556, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.0463320463320462, |
|
"grad_norm": 2.2826385498046875, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.1476, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.0656370656370657, |
|
"grad_norm": 2.1625537872314453, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.1628, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.0849420849420848, |
|
"grad_norm": 2.158252000808716, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.1353, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.1042471042471043, |
|
"grad_norm": 2.2190427780151367, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.1697, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 1.9213645458221436, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1535, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 2.329991340637207, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.1582, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 2.3484299182891846, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.1464, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1814671814671813, |
|
"grad_norm": 2.418144941329956, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.1557, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.200772200772201, |
|
"grad_norm": 2.182532787322998, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.1463, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.2200772200772203, |
|
"grad_norm": 1.8456135988235474, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.1497, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.2393822393822393, |
|
"grad_norm": 2.3882248401641846, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.1487, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.258687258687259, |
|
"grad_norm": 2.5126445293426514, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.1577, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.277992277992278, |
|
"grad_norm": 2.2082295417785645, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.1491, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.2972972972972974, |
|
"grad_norm": 2.6382830142974854, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1428, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 2.447270631790161, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1548, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"eval_loss": 0.39975878596305847, |
|
"eval_runtime": 2877.301, |
|
"eval_samples_per_second": 2.44, |
|
"eval_steps_per_second": 0.153, |
|
"eval_wer": 0.28095621461590403, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.335907335907336, |
|
"grad_norm": 1.664686679840088, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.1407, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.3552123552123554, |
|
"grad_norm": 2.261843681335449, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.1511, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.3745173745173744, |
|
"grad_norm": 2.157227039337158, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.1433, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.393822393822394, |
|
"grad_norm": 2.434943199157715, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.1476, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.413127413127413, |
|
"grad_norm": 2.106074333190918, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.1496, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 2.263333320617676, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.1409, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.4517374517374515, |
|
"grad_norm": 2.9890952110290527, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.1497, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.471042471042471, |
|
"grad_norm": 2.174651622772217, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1418, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4903474903474905, |
|
"grad_norm": 2.380537509918213, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.1369, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.5096525096525095, |
|
"grad_norm": 2.1118268966674805, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.1465, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.528957528957529, |
|
"grad_norm": 2.221266746520996, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.1392, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.5482625482625485, |
|
"grad_norm": 2.216095209121704, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.143, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.5675675675675675, |
|
"grad_norm": 2.23201322555542, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.1399, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.586872586872587, |
|
"grad_norm": 2.174283742904663, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.1531, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.606177606177606, |
|
"grad_norm": 2.455362558364868, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.1466, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.6254826254826256, |
|
"grad_norm": 2.6259799003601074, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.1458, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.6447876447876446, |
|
"grad_norm": 2.1615617275238037, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.145, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.664092664092664, |
|
"grad_norm": 2.36684513092041, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.1462, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.6833976833976836, |
|
"grad_norm": 2.027125835418701, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.1445, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 2.1827645301818848, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.1466, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.722007722007722, |
|
"grad_norm": 2.1014013290405273, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.1453, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.741312741312741, |
|
"grad_norm": 1.92685866355896, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.1341, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.7606177606177607, |
|
"grad_norm": 2.028932571411133, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.1399, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.7799227799227797, |
|
"grad_norm": 1.9122258424758911, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.1433, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.799227799227799, |
|
"grad_norm": 2.2335093021392822, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.1438, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.8185328185328187, |
|
"grad_norm": 2.2860329151153564, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1392, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.8378378378378377, |
|
"grad_norm": 1.873155951499939, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.1396, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 2.6895735263824463, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.1419, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.8764478764478767, |
|
"grad_norm": 1.6812546253204346, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.138, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.8957528957528957, |
|
"grad_norm": 2.893087387084961, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.1479, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.915057915057915, |
|
"grad_norm": 2.508882999420166, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.1427, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.9343629343629343, |
|
"grad_norm": 2.536713123321533, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.1476, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.9536679536679538, |
|
"grad_norm": 2.256779432296753, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.1357, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 2.0813608169555664, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.1362, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.9922779922779923, |
|
"grad_norm": 2.6842668056488037, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1527, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 3.011583011583012, |
|
"grad_norm": 1.5669656991958618, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.1045, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.030888030888031, |
|
"grad_norm": 1.9614019393920898, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.0861, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 3.0501930501930503, |
|
"grad_norm": 1.6240154504776, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.0811, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.0694980694980694, |
|
"grad_norm": 1.675820231437683, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.0808, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"grad_norm": 1.5205894708633423, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0755, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"eval_loss": 0.42321887612342834, |
|
"eval_runtime": 2878.9611, |
|
"eval_samples_per_second": 2.439, |
|
"eval_steps_per_second": 0.152, |
|
"eval_wer": 0.27565833896016206, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.108108108108108, |
|
"grad_norm": 1.8952137231826782, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.0774, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 3.1274131274131274, |
|
"grad_norm": 1.7920564413070679, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.0796, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.146718146718147, |
|
"grad_norm": 1.4591903686523438, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.0784, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 3.166023166023166, |
|
"grad_norm": 1.7589879035949707, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.08, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.1853281853281854, |
|
"grad_norm": 2.2422068119049072, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.0784, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.2046332046332044, |
|
"grad_norm": 2.1973836421966553, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.0747, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.223938223938224, |
|
"grad_norm": 1.9121280908584595, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.08, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"grad_norm": 1.950451135635376, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.0787, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.2625482625482625, |
|
"grad_norm": 1.5729962587356567, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.0708, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.281853281853282, |
|
"grad_norm": 1.7916944026947021, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.074, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.301158301158301, |
|
"grad_norm": 2.091259717941284, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.0734, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.3204633204633205, |
|
"grad_norm": 2.2973198890686035, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.0763, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.33976833976834, |
|
"grad_norm": 2.0894815921783447, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0748, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 3.359073359073359, |
|
"grad_norm": 1.7151942253112793, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.0719, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.3783783783783785, |
|
"grad_norm": 2.2781429290771484, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.0795, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.3976833976833976, |
|
"grad_norm": 1.7880635261535645, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0765, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.416988416988417, |
|
"grad_norm": 1.8607178926467896, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.0806, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.436293436293436, |
|
"grad_norm": 1.5629892349243164, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.0731, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.4555984555984556, |
|
"grad_norm": 1.976099967956543, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.0727, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.474903474903475, |
|
"grad_norm": 1.7316856384277344, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.078, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.494208494208494, |
|
"grad_norm": 1.796291470527649, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.0653, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.5135135135135136, |
|
"grad_norm": 1.6075197458267212, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0788, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.532818532818533, |
|
"grad_norm": 1.6467320919036865, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.0695, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.552123552123552, |
|
"grad_norm": 1.1778756380081177, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0683, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 1.402613878250122, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0698, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.5907335907335907, |
|
"grad_norm": 2.3424105644226074, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.0788, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.61003861003861, |
|
"grad_norm": 1.551446795463562, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.0765, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.629343629343629, |
|
"grad_norm": 2.174612045288086, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0759, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.6486486486486487, |
|
"grad_norm": 1.595508337020874, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.0783, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.667953667953668, |
|
"grad_norm": 1.6800012588500977, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0768, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.687258687258687, |
|
"grad_norm": 2.050546884536743, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0736, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.7065637065637067, |
|
"grad_norm": 1.9666138887405396, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0735, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.7258687258687258, |
|
"grad_norm": 2.0180180072784424, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.071, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.7451737451737452, |
|
"grad_norm": 2.1212894916534424, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.0907, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.7644787644787643, |
|
"grad_norm": 1.6568444967269897, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.0746, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.7837837837837838, |
|
"grad_norm": 2.122925281524658, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0734, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.8030888030888033, |
|
"grad_norm": 1.5614521503448486, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.0737, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.8223938223938223, |
|
"grad_norm": 3.5003342628479004, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.0743, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.841698841698842, |
|
"grad_norm": 1.8813285827636719, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.0712, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"grad_norm": 1.836345911026001, |
|
"learning_rate": 0.0, |
|
"loss": 0.0767, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"eval_loss": 0.4226454198360443, |
|
"eval_runtime": 2912.2253, |
|
"eval_samples_per_second": 2.411, |
|
"eval_steps_per_second": 0.151, |
|
"eval_wer": 0.2671921259024568, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"step": 5000, |
|
"total_flos": 5.435487665750016e+20, |
|
"train_loss": 0.2543565913915634, |
|
"train_runtime": 60445.9813, |
|
"train_samples_per_second": 2.647, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.435487665750016e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|