{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.79581151832461, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.65, "learning_rate": 4.94e-05, "loss": 6.5844, "step": 500 }, { "epoch": 0.65, "eval_loss": 2.985063076019287, "eval_runtime": 66.7207, "eval_samples_per_second": 15.348, "eval_steps_per_second": 1.918, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.31, "learning_rate": 9.94e-05, "loss": 2.1717, "step": 1000 }, { "epoch": 1.31, "eval_loss": 1.0769672393798828, "eval_runtime": 66.0484, "eval_samples_per_second": 15.504, "eval_steps_per_second": 1.938, "eval_wer": 0.8259404828747895, "step": 1000 }, { "epoch": 1.96, "learning_rate": 9.774635036496352e-05, "loss": 1.0058, "step": 1500 }, { "epoch": 1.96, "eval_loss": 0.7441346049308777, "eval_runtime": 66.5848, "eval_samples_per_second": 15.379, "eval_steps_per_second": 1.922, "eval_wer": 0.6882650196518809, "step": 1500 }, { "epoch": 2.62, "learning_rate": 9.546532846715329e-05, "loss": 0.7316, "step": 2000 }, { "epoch": 2.62, "eval_loss": 0.6406537294387817, "eval_runtime": 65.9141, "eval_samples_per_second": 15.535, "eval_steps_per_second": 1.942, "eval_wer": 0.617630544637844, "step": 2000 }, { "epoch": 3.27, "learning_rate": 9.318430656934307e-05, "loss": 0.6249, "step": 2500 }, { "epoch": 3.27, "eval_loss": 0.5814340114593506, "eval_runtime": 66.4954, "eval_samples_per_second": 15.4, "eval_steps_per_second": 1.925, "eval_wer": 0.5783267827063447, "step": 2500 }, { "epoch": 3.93, "learning_rate": 9.090328467153285e-05, "loss": 0.5541, "step": 3000 }, { "epoch": 3.93, "eval_loss": 0.5387497544288635, "eval_runtime": 66.9564, "eval_samples_per_second": 15.294, "eval_steps_per_second": 1.912, "eval_wer": 0.5449747332959012, "step": 3000 }, { "epoch": 4.58, "learning_rate": 8.862226277372263e-05, "loss": 0.4727, "step": 3500 }, { "epoch": 4.58, "eval_loss": 0.5657380223274231, "eval_runtime": 66.1646, "eval_samples_per_second": 15.477, "eval_steps_per_second": 1.935, "eval_wer": 0.5357664233576642, "step": 3500 }, { "epoch": 5.24, "learning_rate": 8.634124087591242e-05, "loss": 0.4444, "step": 4000 }, { "epoch": 5.24, "eval_loss": 0.5406317710876465, "eval_runtime": 65.9956, "eval_samples_per_second": 15.516, "eval_steps_per_second": 1.94, "eval_wer": 0.5164514317798989, "step": 4000 }, { "epoch": 5.89, "learning_rate": 8.406021897810219e-05, "loss": 0.4057, "step": 4500 }, { "epoch": 5.89, "eval_loss": 0.5271017551422119, "eval_runtime": 66.6343, "eval_samples_per_second": 15.367, "eval_steps_per_second": 1.921, "eval_wer": 0.5116226838854576, "step": 4500 }, { "epoch": 6.54, "learning_rate": 8.177919708029196e-05, "loss": 0.3663, "step": 5000 }, { "epoch": 6.54, "eval_loss": 0.5105618238449097, "eval_runtime": 66.576, "eval_samples_per_second": 15.381, "eval_steps_per_second": 1.923, "eval_wer": 0.4793935991016283, "step": 5000 }, { "epoch": 7.2, "learning_rate": 7.949817518248176e-05, "loss": 0.3406, "step": 5500 }, { "epoch": 7.2, "eval_loss": 0.5662369132041931, "eval_runtime": 67.3476, "eval_samples_per_second": 15.205, "eval_steps_per_second": 1.901, "eval_wer": 0.48568220101066817, "step": 5500 }, { "epoch": 7.85, "learning_rate": 7.721715328467154e-05, "loss": 0.3142, "step": 6000 }, { "epoch": 7.85, "eval_loss": 0.5632680654525757, "eval_runtime": 66.2027, "eval_samples_per_second": 15.468, "eval_steps_per_second": 1.933, "eval_wer": 0.4879281302638967, "step": 6000 }, { "epoch": 8.51, "learning_rate": 7.493613138686131e-05, "loss": 0.3002, "step": 6500 }, { "epoch": 8.51, "eval_loss": 0.5218114852905273, "eval_runtime": 67.5136, "eval_samples_per_second": 15.167, "eval_steps_per_second": 1.896, "eval_wer": 0.45659741718135877, "step": 6500 }, { "epoch": 9.16, "learning_rate": 7.265967153284671e-05, "loss": 0.2845, "step": 7000 }, { "epoch": 9.16, "eval_loss": 0.5436348915100098, "eval_runtime": 66.2104, "eval_samples_per_second": 15.466, "eval_steps_per_second": 1.933, "eval_wer": 0.45457608085345313, "step": 7000 }, { "epoch": 9.82, "learning_rate": 7.038321167883212e-05, "loss": 0.2603, "step": 7500 }, { "epoch": 9.82, "eval_loss": 0.5183758735656738, "eval_runtime": 67.776, "eval_samples_per_second": 15.109, "eval_steps_per_second": 1.889, "eval_wer": 0.4415496911847277, "step": 7500 }, { "epoch": 10.47, "learning_rate": 6.810218978102189e-05, "loss": 0.2438, "step": 8000 }, { "epoch": 10.47, "eval_loss": 0.5246397256851196, "eval_runtime": 66.5435, "eval_samples_per_second": 15.388, "eval_steps_per_second": 1.924, "eval_wer": 0.43638405390230206, "step": 8000 }, { "epoch": 11.13, "learning_rate": 6.582116788321169e-05, "loss": 0.2396, "step": 8500 }, { "epoch": 11.13, "eval_loss": 0.5663778781890869, "eval_runtime": 66.6323, "eval_samples_per_second": 15.368, "eval_steps_per_second": 1.921, "eval_wer": 0.4428972487366648, "step": 8500 }, { "epoch": 11.78, "learning_rate": 6.354014598540147e-05, "loss": 0.2229, "step": 9000 }, { "epoch": 11.78, "eval_loss": 0.5550942420959473, "eval_runtime": 65.2937, "eval_samples_per_second": 15.683, "eval_steps_per_second": 1.96, "eval_wer": 0.4415496911847277, "step": 9000 }, { "epoch": 12.43, "learning_rate": 6.125912408759124e-05, "loss": 0.2088, "step": 9500 }, { "epoch": 12.43, "eval_loss": 0.5503721833229065, "eval_runtime": 66.2676, "eval_samples_per_second": 15.453, "eval_steps_per_second": 1.932, "eval_wer": 0.4257158899494666, "step": 9500 }, { "epoch": 13.09, "learning_rate": 5.897810218978103e-05, "loss": 0.2042, "step": 10000 }, { "epoch": 13.09, "eval_loss": 0.5646582245826721, "eval_runtime": 66.4465, "eval_samples_per_second": 15.411, "eval_steps_per_second": 1.926, "eval_wer": 0.4398652442448063, "step": 10000 }, { "epoch": 13.74, "learning_rate": 5.669708029197081e-05, "loss": 0.1975, "step": 10500 }, { "epoch": 13.74, "eval_loss": 0.5424100160598755, "eval_runtime": 66.9743, "eval_samples_per_second": 15.289, "eval_steps_per_second": 1.911, "eval_wer": 0.4186412128017967, "step": 10500 }, { "epoch": 14.4, "learning_rate": 5.441605839416058e-05, "loss": 0.1856, "step": 11000 }, { "epoch": 14.4, "eval_loss": 0.5583313703536987, "eval_runtime": 66.5902, "eval_samples_per_second": 15.378, "eval_steps_per_second": 1.922, "eval_wer": 0.42515440763615947, "step": 11000 }, { "epoch": 15.05, "learning_rate": 5.213503649635036e-05, "loss": 0.1872, "step": 11500 }, { "epoch": 15.05, "eval_loss": 0.6118334531784058, "eval_runtime": 66.3517, "eval_samples_per_second": 15.433, "eval_steps_per_second": 1.929, "eval_wer": 0.433464345873105, "step": 11500 }, { "epoch": 15.71, "learning_rate": 4.985401459854015e-05, "loss": 0.1676, "step": 12000 }, { "epoch": 15.71, "eval_loss": 0.6066869497299194, "eval_runtime": 67.2768, "eval_samples_per_second": 15.221, "eval_steps_per_second": 1.903, "eval_wer": 0.43627175743964064, "step": 12000 }, { "epoch": 16.36, "learning_rate": 4.757299270072993e-05, "loss": 0.1536, "step": 12500 }, { "epoch": 16.36, "eval_loss": 0.5462669730186462, "eval_runtime": 66.3968, "eval_samples_per_second": 15.422, "eval_steps_per_second": 1.928, "eval_wer": 0.412914093206064, "step": 12500 }, { "epoch": 17.02, "learning_rate": 4.5291970802919706e-05, "loss": 0.1582, "step": 13000 }, { "epoch": 17.02, "eval_loss": 0.5799742937088013, "eval_runtime": 65.9177, "eval_samples_per_second": 15.535, "eval_steps_per_second": 1.942, "eval_wer": 0.41021897810218977, "step": 13000 }, { "epoch": 17.67, "learning_rate": 4.3015510948905114e-05, "loss": 0.1429, "step": 13500 }, { "epoch": 17.67, "eval_loss": 0.5899107456207275, "eval_runtime": 66.1113, "eval_samples_per_second": 15.489, "eval_steps_per_second": 1.936, "eval_wer": 0.4150477259966311, "step": 13500 }, { "epoch": 18.32, "learning_rate": 4.0734489051094895e-05, "loss": 0.1451, "step": 14000 }, { "epoch": 18.32, "eval_loss": 0.6129721999168396, "eval_runtime": 67.4516, "eval_samples_per_second": 15.181, "eval_steps_per_second": 1.898, "eval_wer": 0.4138124649073554, "step": 14000 }, { "epoch": 18.98, "learning_rate": 3.845346715328467e-05, "loss": 0.1356, "step": 14500 }, { "epoch": 18.98, "eval_loss": 0.6040089130401611, "eval_runtime": 66.75, "eval_samples_per_second": 15.341, "eval_steps_per_second": 1.918, "eval_wer": 0.4123526108927569, "step": 14500 }, { "epoch": 19.63, "learning_rate": 3.617244525547445e-05, "loss": 0.134, "step": 15000 }, { "epoch": 19.63, "eval_loss": 0.5997006893157959, "eval_runtime": 67.809, "eval_samples_per_second": 15.101, "eval_steps_per_second": 1.888, "eval_wer": 0.40819764177428414, "step": 15000 }, { "epoch": 20.29, "learning_rate": 3.389598540145986e-05, "loss": 0.1305, "step": 15500 }, { "epoch": 20.29, "eval_loss": 0.5769977569580078, "eval_runtime": 66.9982, "eval_samples_per_second": 15.284, "eval_steps_per_second": 1.91, "eval_wer": 0.40235822571588997, "step": 15500 }, { "epoch": 20.94, "learning_rate": 3.161496350364963e-05, "loss": 0.1222, "step": 16000 }, { "epoch": 20.94, "eval_loss": 0.5980133414268494, "eval_runtime": 66.1854, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.934, "eval_wer": 0.391577765300393, "step": 16000 }, { "epoch": 21.6, "learning_rate": 2.9333941605839417e-05, "loss": 0.1214, "step": 16500 }, { "epoch": 21.6, "eval_loss": 0.5917083024978638, "eval_runtime": 67.5557, "eval_samples_per_second": 15.158, "eval_steps_per_second": 1.895, "eval_wer": 0.4005614823133071, "step": 16500 }, { "epoch": 22.25, "learning_rate": 2.70529197080292e-05, "loss": 0.1184, "step": 17000 }, { "epoch": 22.25, "eval_loss": 0.578889787197113, "eval_runtime": 66.5407, "eval_samples_per_second": 15.389, "eval_steps_per_second": 1.924, "eval_wer": 0.39460976979225154, "step": 17000 }, { "epoch": 22.91, "learning_rate": 2.477189781021898e-05, "loss": 0.1132, "step": 17500 }, { "epoch": 22.91, "eval_loss": 0.5992549657821655, "eval_runtime": 66.4759, "eval_samples_per_second": 15.404, "eval_steps_per_second": 1.926, "eval_wer": 0.3892195395845031, "step": 17500 }, { "epoch": 23.56, "learning_rate": 2.249087591240876e-05, "loss": 0.1052, "step": 18000 }, { "epoch": 23.56, "eval_loss": 0.5946719646453857, "eval_runtime": 66.8036, "eval_samples_per_second": 15.329, "eval_steps_per_second": 1.916, "eval_wer": 0.3889949466591802, "step": 18000 }, { "epoch": 24.21, "learning_rate": 2.0209854014598544e-05, "loss": 0.113, "step": 18500 }, { "epoch": 24.21, "eval_loss": 0.6009370684623718, "eval_runtime": 65.8523, "eval_samples_per_second": 15.55, "eval_steps_per_second": 1.944, "eval_wer": 0.3841661987647389, "step": 18500 }, { "epoch": 24.87, "learning_rate": 1.792883211678832e-05, "loss": 0.1021, "step": 19000 }, { "epoch": 24.87, "eval_loss": 0.6255775690078735, "eval_runtime": 67.7689, "eval_samples_per_second": 15.11, "eval_steps_per_second": 1.889, "eval_wer": 0.3841661987647389, "step": 19000 }, { "epoch": 25.52, "learning_rate": 1.5652372262773723e-05, "loss": 0.0966, "step": 19500 }, { "epoch": 25.52, "eval_loss": 0.6213188171386719, "eval_runtime": 68.5057, "eval_samples_per_second": 14.948, "eval_steps_per_second": 1.868, "eval_wer": 0.3869736103312746, "step": 19500 }, { "epoch": 26.18, "learning_rate": 1.3371350364963504e-05, "loss": 0.0981, "step": 20000 }, { "epoch": 26.18, "eval_loss": 0.6331676244735718, "eval_runtime": 67.2276, "eval_samples_per_second": 15.232, "eval_steps_per_second": 1.904, "eval_wer": 0.37866367209432905, "step": 20000 }, { "epoch": 26.83, "learning_rate": 1.1090328467153285e-05, "loss": 0.0983, "step": 20500 }, { "epoch": 26.83, "eval_loss": 0.6225576996803284, "eval_runtime": 66.5907, "eval_samples_per_second": 15.378, "eval_steps_per_second": 1.922, "eval_wer": 0.3806850084222347, "step": 20500 }, { "epoch": 27.49, "learning_rate": 8.809306569343067e-06, "loss": 0.0915, "step": 21000 }, { "epoch": 27.49, "eval_loss": 0.6176589727401733, "eval_runtime": 66.1318, "eval_samples_per_second": 15.484, "eval_steps_per_second": 1.936, "eval_wer": 0.3785513756316676, "step": 21000 }, { "epoch": 28.14, "learning_rate": 6.532846715328468e-06, "loss": 0.0898, "step": 21500 }, { "epoch": 28.14, "eval_loss": 0.6252412796020508, "eval_runtime": 66.2396, "eval_samples_per_second": 15.459, "eval_steps_per_second": 1.932, "eval_wer": 0.3828186412128018, "step": 21500 }, { "epoch": 28.8, "learning_rate": 4.251824817518248e-06, "loss": 0.0916, "step": 22000 }, { "epoch": 28.8, "eval_loss": 0.6163517832756042, "eval_runtime": 66.1166, "eval_samples_per_second": 15.488, "eval_steps_per_second": 1.936, "eval_wer": 0.3760808534531162, "step": 22000 } ], "max_steps": 22920, "num_train_epochs": 30, "total_flos": 6.988254857647251e+18, "trial_name": null, "trial_params": null }