|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.79581151832461, |
|
"global_step": 22000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.94e-05, |
|
"loss": 6.5844, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.985063076019287, |
|
"eval_runtime": 66.7207, |
|
"eval_samples_per_second": 15.348, |
|
"eval_steps_per_second": 1.918, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.94e-05, |
|
"loss": 2.1717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.0769672393798828, |
|
"eval_runtime": 66.0484, |
|
"eval_samples_per_second": 15.504, |
|
"eval_steps_per_second": 1.938, |
|
"eval_wer": 0.8259404828747895, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.774635036496352e-05, |
|
"loss": 1.0058, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.7441346049308777, |
|
"eval_runtime": 66.5848, |
|
"eval_samples_per_second": 15.379, |
|
"eval_steps_per_second": 1.922, |
|
"eval_wer": 0.6882650196518809, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.546532846715329e-05, |
|
"loss": 0.7316, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 0.6406537294387817, |
|
"eval_runtime": 65.9141, |
|
"eval_samples_per_second": 15.535, |
|
"eval_steps_per_second": 1.942, |
|
"eval_wer": 0.617630544637844, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 9.318430656934307e-05, |
|
"loss": 0.6249, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 0.5814340114593506, |
|
"eval_runtime": 66.4954, |
|
"eval_samples_per_second": 15.4, |
|
"eval_steps_per_second": 1.925, |
|
"eval_wer": 0.5783267827063447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 9.090328467153285e-05, |
|
"loss": 0.5541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.5387497544288635, |
|
"eval_runtime": 66.9564, |
|
"eval_samples_per_second": 15.294, |
|
"eval_steps_per_second": 1.912, |
|
"eval_wer": 0.5449747332959012, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.862226277372263e-05, |
|
"loss": 0.4727, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 0.5657380223274231, |
|
"eval_runtime": 66.1646, |
|
"eval_samples_per_second": 15.477, |
|
"eval_steps_per_second": 1.935, |
|
"eval_wer": 0.5357664233576642, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 8.634124087591242e-05, |
|
"loss": 0.4444, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.5406317710876465, |
|
"eval_runtime": 65.9956, |
|
"eval_samples_per_second": 15.516, |
|
"eval_steps_per_second": 1.94, |
|
"eval_wer": 0.5164514317798989, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 8.406021897810219e-05, |
|
"loss": 0.4057, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 0.5271017551422119, |
|
"eval_runtime": 66.6343, |
|
"eval_samples_per_second": 15.367, |
|
"eval_steps_per_second": 1.921, |
|
"eval_wer": 0.5116226838854576, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 8.177919708029196e-05, |
|
"loss": 0.3663, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_loss": 0.5105618238449097, |
|
"eval_runtime": 66.576, |
|
"eval_samples_per_second": 15.381, |
|
"eval_steps_per_second": 1.923, |
|
"eval_wer": 0.4793935991016283, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.949817518248176e-05, |
|
"loss": 0.3406, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.5662369132041931, |
|
"eval_runtime": 67.3476, |
|
"eval_samples_per_second": 15.205, |
|
"eval_steps_per_second": 1.901, |
|
"eval_wer": 0.48568220101066817, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 7.721715328467154e-05, |
|
"loss": 0.3142, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 0.5632680654525757, |
|
"eval_runtime": 66.2027, |
|
"eval_samples_per_second": 15.468, |
|
"eval_steps_per_second": 1.933, |
|
"eval_wer": 0.4879281302638967, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.493613138686131e-05, |
|
"loss": 0.3002, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 0.5218114852905273, |
|
"eval_runtime": 67.5136, |
|
"eval_samples_per_second": 15.167, |
|
"eval_steps_per_second": 1.896, |
|
"eval_wer": 0.45659741718135877, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 7.265967153284671e-05, |
|
"loss": 0.2845, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 0.5436348915100098, |
|
"eval_runtime": 66.2104, |
|
"eval_samples_per_second": 15.466, |
|
"eval_steps_per_second": 1.933, |
|
"eval_wer": 0.45457608085345313, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 7.038321167883212e-05, |
|
"loss": 0.2603, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_loss": 0.5183758735656738, |
|
"eval_runtime": 67.776, |
|
"eval_samples_per_second": 15.109, |
|
"eval_steps_per_second": 1.889, |
|
"eval_wer": 0.4415496911847277, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 6.810218978102189e-05, |
|
"loss": 0.2438, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"eval_loss": 0.5246397256851196, |
|
"eval_runtime": 66.5435, |
|
"eval_samples_per_second": 15.388, |
|
"eval_steps_per_second": 1.924, |
|
"eval_wer": 0.43638405390230206, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 6.582116788321169e-05, |
|
"loss": 0.2396, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"eval_loss": 0.5663778781890869, |
|
"eval_runtime": 66.6323, |
|
"eval_samples_per_second": 15.368, |
|
"eval_steps_per_second": 1.921, |
|
"eval_wer": 0.4428972487366648, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 6.354014598540147e-05, |
|
"loss": 0.2229, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"eval_loss": 0.5550942420959473, |
|
"eval_runtime": 65.2937, |
|
"eval_samples_per_second": 15.683, |
|
"eval_steps_per_second": 1.96, |
|
"eval_wer": 0.4415496911847277, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 6.125912408759124e-05, |
|
"loss": 0.2088, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"eval_loss": 0.5503721833229065, |
|
"eval_runtime": 66.2676, |
|
"eval_samples_per_second": 15.453, |
|
"eval_steps_per_second": 1.932, |
|
"eval_wer": 0.4257158899494666, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 5.897810218978103e-05, |
|
"loss": 0.2042, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"eval_loss": 0.5646582245826721, |
|
"eval_runtime": 66.4465, |
|
"eval_samples_per_second": 15.411, |
|
"eval_steps_per_second": 1.926, |
|
"eval_wer": 0.4398652442448063, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 5.669708029197081e-05, |
|
"loss": 0.1975, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"eval_loss": 0.5424100160598755, |
|
"eval_runtime": 66.9743, |
|
"eval_samples_per_second": 15.289, |
|
"eval_steps_per_second": 1.911, |
|
"eval_wer": 0.4186412128017967, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 5.441605839416058e-05, |
|
"loss": 0.1856, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_loss": 0.5583313703536987, |
|
"eval_runtime": 66.5902, |
|
"eval_samples_per_second": 15.378, |
|
"eval_steps_per_second": 1.922, |
|
"eval_wer": 0.42515440763615947, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 5.213503649635036e-05, |
|
"loss": 0.1872, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"eval_loss": 0.6118334531784058, |
|
"eval_runtime": 66.3517, |
|
"eval_samples_per_second": 15.433, |
|
"eval_steps_per_second": 1.929, |
|
"eval_wer": 0.433464345873105, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 4.985401459854015e-05, |
|
"loss": 0.1676, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"eval_loss": 0.6066869497299194, |
|
"eval_runtime": 67.2768, |
|
"eval_samples_per_second": 15.221, |
|
"eval_steps_per_second": 1.903, |
|
"eval_wer": 0.43627175743964064, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 4.757299270072993e-05, |
|
"loss": 0.1536, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"eval_loss": 0.5462669730186462, |
|
"eval_runtime": 66.3968, |
|
"eval_samples_per_second": 15.422, |
|
"eval_steps_per_second": 1.928, |
|
"eval_wer": 0.412914093206064, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.5291970802919706e-05, |
|
"loss": 0.1582, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_loss": 0.5799742937088013, |
|
"eval_runtime": 65.9177, |
|
"eval_samples_per_second": 15.535, |
|
"eval_steps_per_second": 1.942, |
|
"eval_wer": 0.41021897810218977, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 4.3015510948905114e-05, |
|
"loss": 0.1429, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"eval_loss": 0.5899107456207275, |
|
"eval_runtime": 66.1113, |
|
"eval_samples_per_second": 15.489, |
|
"eval_steps_per_second": 1.936, |
|
"eval_wer": 0.4150477259966311, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 4.0734489051094895e-05, |
|
"loss": 0.1451, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"eval_loss": 0.6129721999168396, |
|
"eval_runtime": 67.4516, |
|
"eval_samples_per_second": 15.181, |
|
"eval_steps_per_second": 1.898, |
|
"eval_wer": 0.4138124649073554, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 3.845346715328467e-05, |
|
"loss": 0.1356, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_loss": 0.6040089130401611, |
|
"eval_runtime": 66.75, |
|
"eval_samples_per_second": 15.341, |
|
"eval_steps_per_second": 1.918, |
|
"eval_wer": 0.4123526108927569, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 3.617244525547445e-05, |
|
"loss": 0.134, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"eval_loss": 0.5997006893157959, |
|
"eval_runtime": 67.809, |
|
"eval_samples_per_second": 15.101, |
|
"eval_steps_per_second": 1.888, |
|
"eval_wer": 0.40819764177428414, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 3.389598540145986e-05, |
|
"loss": 0.1305, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"eval_loss": 0.5769977569580078, |
|
"eval_runtime": 66.9982, |
|
"eval_samples_per_second": 15.284, |
|
"eval_steps_per_second": 1.91, |
|
"eval_wer": 0.40235822571588997, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 20.94, |
|
"learning_rate": 3.161496350364963e-05, |
|
"loss": 0.1222, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 20.94, |
|
"eval_loss": 0.5980133414268494, |
|
"eval_runtime": 66.1854, |
|
"eval_samples_per_second": 15.472, |
|
"eval_steps_per_second": 1.934, |
|
"eval_wer": 0.391577765300393, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 2.9333941605839417e-05, |
|
"loss": 0.1214, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_loss": 0.5917083024978638, |
|
"eval_runtime": 67.5557, |
|
"eval_samples_per_second": 15.158, |
|
"eval_steps_per_second": 1.895, |
|
"eval_wer": 0.4005614823133071, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 2.70529197080292e-05, |
|
"loss": 0.1184, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"eval_loss": 0.578889787197113, |
|
"eval_runtime": 66.5407, |
|
"eval_samples_per_second": 15.389, |
|
"eval_steps_per_second": 1.924, |
|
"eval_wer": 0.39460976979225154, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"learning_rate": 2.477189781021898e-05, |
|
"loss": 0.1132, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"eval_loss": 0.5992549657821655, |
|
"eval_runtime": 66.4759, |
|
"eval_samples_per_second": 15.404, |
|
"eval_steps_per_second": 1.926, |
|
"eval_wer": 0.3892195395845031, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 2.249087591240876e-05, |
|
"loss": 0.1052, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"eval_loss": 0.5946719646453857, |
|
"eval_runtime": 66.8036, |
|
"eval_samples_per_second": 15.329, |
|
"eval_steps_per_second": 1.916, |
|
"eval_wer": 0.3889949466591802, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 2.0209854014598544e-05, |
|
"loss": 0.113, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"eval_loss": 0.6009370684623718, |
|
"eval_runtime": 65.8523, |
|
"eval_samples_per_second": 15.55, |
|
"eval_steps_per_second": 1.944, |
|
"eval_wer": 0.3841661987647389, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"learning_rate": 1.792883211678832e-05, |
|
"loss": 0.1021, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"eval_loss": 0.6255775690078735, |
|
"eval_runtime": 67.7689, |
|
"eval_samples_per_second": 15.11, |
|
"eval_steps_per_second": 1.889, |
|
"eval_wer": 0.3841661987647389, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 1.5652372262773723e-05, |
|
"loss": 0.0966, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"eval_loss": 0.6213188171386719, |
|
"eval_runtime": 68.5057, |
|
"eval_samples_per_second": 14.948, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 0.3869736103312746, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 1.3371350364963504e-05, |
|
"loss": 0.0981, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"eval_loss": 0.6331676244735718, |
|
"eval_runtime": 67.2276, |
|
"eval_samples_per_second": 15.232, |
|
"eval_steps_per_second": 1.904, |
|
"eval_wer": 0.37866367209432905, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 26.83, |
|
"learning_rate": 1.1090328467153285e-05, |
|
"loss": 0.0983, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 26.83, |
|
"eval_loss": 0.6225576996803284, |
|
"eval_runtime": 66.5907, |
|
"eval_samples_per_second": 15.378, |
|
"eval_steps_per_second": 1.922, |
|
"eval_wer": 0.3806850084222347, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 27.49, |
|
"learning_rate": 8.809306569343067e-06, |
|
"loss": 0.0915, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 27.49, |
|
"eval_loss": 0.6176589727401733, |
|
"eval_runtime": 66.1318, |
|
"eval_samples_per_second": 15.484, |
|
"eval_steps_per_second": 1.936, |
|
"eval_wer": 0.3785513756316676, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"learning_rate": 6.532846715328468e-06, |
|
"loss": 0.0898, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"eval_loss": 0.6252412796020508, |
|
"eval_runtime": 66.2396, |
|
"eval_samples_per_second": 15.459, |
|
"eval_steps_per_second": 1.932, |
|
"eval_wer": 0.3828186412128018, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 4.251824817518248e-06, |
|
"loss": 0.0916, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_loss": 0.6163517832756042, |
|
"eval_runtime": 66.1166, |
|
"eval_samples_per_second": 15.488, |
|
"eval_steps_per_second": 1.936, |
|
"eval_wer": 0.3760808534531162, |
|
"step": 22000 |
|
} |
|
], |
|
"max_steps": 22920, |
|
"num_train_epochs": 30, |
|
"total_flos": 6.988254857647251e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|