diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..476ba9664e2127bbe9f4e1d383f90c2eaa59f020 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,12 @@ +wav2vec2_esp_15h/checkpoint-22500/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_15h/checkpoint-22500/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_15h/checkpoint-22000/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_15h/checkpoint-22000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_5h/checkpoint-8000/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_5h/checkpoint-8000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_5h/checkpoint-8500/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_5h/checkpoint-8500/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_30h/checkpoint-44000/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_30h/checkpoint-44000/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_30h/checkpoint-43500/optimizer.pt filter=lfs diff=lfs merge=lfs -text +wav2vec2_esp_30h/checkpoint-43500/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text diff --git a/wav2vec2_esp_15h/checkpoint-22000/config.json b/wav2vec2_esp_15h/checkpoint-22000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22000/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_15h/checkpoint-22000/optimizer.pt b/wav2vec2_esp_15h/checkpoint-22000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e22919f055b6b46ffce6cf60c6335ad09e6695e0 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60486088c50cd0525ab854a55277c2dc80e2e8d5c5c76645332befaa06db0358 +size 721723525 diff --git a/wav2vec2_esp_15h/checkpoint-22000/preprocessor_config.json b/wav2vec2_esp_15h/checkpoint-22000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22000/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_15h/checkpoint-22000/pytorch_model.bin b/wav2vec2_esp_15h/checkpoint-22000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e585b38d7513841af263770d21a1f0f1cb84bef6 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627c8266a4f9c844c240dce1a26a3b0daf5c09696c12507ea040b8387fc75134 +size 377691873 diff --git a/wav2vec2_esp_15h/checkpoint-22000/rng_state.pth b/wav2vec2_esp_15h/checkpoint-22000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb34e536f331351cd38f1a501b96ad9d087d7545 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22000/rng_state.pth differ diff --git a/wav2vec2_esp_15h/checkpoint-22000/scaler.pt b/wav2vec2_esp_15h/checkpoint-22000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b9b9465464991ac95fe23e2d75e6352cccc8ab3 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22000/scaler.pt differ diff --git a/wav2vec2_esp_15h/checkpoint-22000/scheduler.pt b/wav2vec2_esp_15h/checkpoint-22000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..963e08b2f158be5af84a0a690a1d6b62e17c3a5c Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22000/scheduler.pt differ diff --git a/wav2vec2_esp_15h/checkpoint-22000/trainer_state.json b/wav2vec2_esp_15h/checkpoint-22000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..22ef4142684017e9e2f64a2be308386570729151 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22000/trainer_state.json @@ -0,0 +1,676 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.79581151832461, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.65, + "learning_rate": 4.94e-05, + "loss": 6.5844, + "step": 500 + }, + { + "epoch": 0.65, + "eval_loss": 2.985063076019287, + "eval_runtime": 66.7207, + "eval_samples_per_second": 15.348, + "eval_steps_per_second": 1.918, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 1.31, + "learning_rate": 9.94e-05, + "loss": 2.1717, + "step": 1000 + }, + { + "epoch": 1.31, + "eval_loss": 1.0769672393798828, + "eval_runtime": 66.0484, + "eval_samples_per_second": 15.504, + "eval_steps_per_second": 1.938, + "eval_wer": 0.8259404828747895, + "step": 1000 + }, + { + "epoch": 1.96, + "learning_rate": 9.774635036496352e-05, + "loss": 1.0058, + "step": 1500 + }, + { + "epoch": 1.96, + "eval_loss": 0.7441346049308777, + "eval_runtime": 66.5848, + "eval_samples_per_second": 15.379, + "eval_steps_per_second": 1.922, + "eval_wer": 0.6882650196518809, + "step": 1500 + }, + { + "epoch": 2.62, + "learning_rate": 9.546532846715329e-05, + "loss": 0.7316, + "step": 2000 + }, + { + "epoch": 2.62, + "eval_loss": 0.6406537294387817, + "eval_runtime": 65.9141, + "eval_samples_per_second": 15.535, + "eval_steps_per_second": 1.942, + "eval_wer": 0.617630544637844, + "step": 2000 + }, + { + "epoch": 3.27, + "learning_rate": 9.318430656934307e-05, + "loss": 0.6249, + "step": 2500 + }, + { + "epoch": 3.27, + "eval_loss": 0.5814340114593506, + "eval_runtime": 66.4954, + "eval_samples_per_second": 15.4, + "eval_steps_per_second": 1.925, + "eval_wer": 0.5783267827063447, + "step": 2500 + }, + { + "epoch": 3.93, + "learning_rate": 9.090328467153285e-05, + "loss": 0.5541, + "step": 3000 + }, + { + "epoch": 3.93, + "eval_loss": 0.5387497544288635, + "eval_runtime": 66.9564, + "eval_samples_per_second": 15.294, + "eval_steps_per_second": 1.912, + "eval_wer": 0.5449747332959012, + "step": 3000 + }, + { + "epoch": 4.58, + "learning_rate": 8.862226277372263e-05, + "loss": 0.4727, + "step": 3500 + }, + { + "epoch": 4.58, + "eval_loss": 0.5657380223274231, + "eval_runtime": 66.1646, + "eval_samples_per_second": 15.477, + "eval_steps_per_second": 1.935, + "eval_wer": 0.5357664233576642, + "step": 3500 + }, + { + "epoch": 5.24, + "learning_rate": 8.634124087591242e-05, + "loss": 0.4444, + "step": 4000 + }, + { + "epoch": 5.24, + "eval_loss": 0.5406317710876465, + "eval_runtime": 65.9956, + "eval_samples_per_second": 15.516, + "eval_steps_per_second": 1.94, + "eval_wer": 0.5164514317798989, + "step": 4000 + }, + { + "epoch": 5.89, + "learning_rate": 8.406021897810219e-05, + "loss": 0.4057, + "step": 4500 + }, + { + "epoch": 5.89, + "eval_loss": 0.5271017551422119, + "eval_runtime": 66.6343, + "eval_samples_per_second": 15.367, + "eval_steps_per_second": 1.921, + "eval_wer": 0.5116226838854576, + "step": 4500 + }, + { + "epoch": 6.54, + "learning_rate": 8.177919708029196e-05, + "loss": 0.3663, + "step": 5000 + }, + { + "epoch": 6.54, + "eval_loss": 0.5105618238449097, + "eval_runtime": 66.576, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.923, + "eval_wer": 0.4793935991016283, + "step": 5000 + }, + { + "epoch": 7.2, + "learning_rate": 7.949817518248176e-05, + "loss": 0.3406, + "step": 5500 + }, + { + "epoch": 7.2, + "eval_loss": 0.5662369132041931, + "eval_runtime": 67.3476, + "eval_samples_per_second": 15.205, + "eval_steps_per_second": 1.901, + "eval_wer": 0.48568220101066817, + "step": 5500 + }, + { + "epoch": 7.85, + "learning_rate": 7.721715328467154e-05, + "loss": 0.3142, + "step": 6000 + }, + { + "epoch": 7.85, + "eval_loss": 0.5632680654525757, + "eval_runtime": 66.2027, + "eval_samples_per_second": 15.468, + "eval_steps_per_second": 1.933, + "eval_wer": 0.4879281302638967, + "step": 6000 + }, + { + "epoch": 8.51, + "learning_rate": 7.493613138686131e-05, + "loss": 0.3002, + "step": 6500 + }, + { + "epoch": 8.51, + "eval_loss": 0.5218114852905273, + "eval_runtime": 67.5136, + "eval_samples_per_second": 15.167, + "eval_steps_per_second": 1.896, + "eval_wer": 0.45659741718135877, + "step": 6500 + }, + { + "epoch": 9.16, + "learning_rate": 7.265967153284671e-05, + "loss": 0.2845, + "step": 7000 + }, + { + "epoch": 9.16, + "eval_loss": 0.5436348915100098, + "eval_runtime": 66.2104, + "eval_samples_per_second": 15.466, + "eval_steps_per_second": 1.933, + "eval_wer": 0.45457608085345313, + "step": 7000 + }, + { + "epoch": 9.82, + "learning_rate": 7.038321167883212e-05, + "loss": 0.2603, + "step": 7500 + }, + { + "epoch": 9.82, + "eval_loss": 0.5183758735656738, + "eval_runtime": 67.776, + "eval_samples_per_second": 15.109, + "eval_steps_per_second": 1.889, + "eval_wer": 0.4415496911847277, + "step": 7500 + }, + { + "epoch": 10.47, + "learning_rate": 6.810218978102189e-05, + "loss": 0.2438, + "step": 8000 + }, + { + "epoch": 10.47, + "eval_loss": 0.5246397256851196, + "eval_runtime": 66.5435, + "eval_samples_per_second": 15.388, + "eval_steps_per_second": 1.924, + "eval_wer": 0.43638405390230206, + "step": 8000 + }, + { + "epoch": 11.13, + "learning_rate": 6.582116788321169e-05, + "loss": 0.2396, + "step": 8500 + }, + { + "epoch": 11.13, + "eval_loss": 0.5663778781890869, + "eval_runtime": 66.6323, + "eval_samples_per_second": 15.368, + "eval_steps_per_second": 1.921, + "eval_wer": 0.4428972487366648, + "step": 8500 + }, + { + "epoch": 11.78, + "learning_rate": 6.354014598540147e-05, + "loss": 0.2229, + "step": 9000 + }, + { + "epoch": 11.78, + "eval_loss": 0.5550942420959473, + "eval_runtime": 65.2937, + "eval_samples_per_second": 15.683, + "eval_steps_per_second": 1.96, + "eval_wer": 0.4415496911847277, + "step": 9000 + }, + { + "epoch": 12.43, + "learning_rate": 6.125912408759124e-05, + "loss": 0.2088, + "step": 9500 + }, + { + "epoch": 12.43, + "eval_loss": 0.5503721833229065, + "eval_runtime": 66.2676, + "eval_samples_per_second": 15.453, + "eval_steps_per_second": 1.932, + "eval_wer": 0.4257158899494666, + "step": 9500 + }, + { + "epoch": 13.09, + "learning_rate": 5.897810218978103e-05, + "loss": 0.2042, + "step": 10000 + }, + { + "epoch": 13.09, + "eval_loss": 0.5646582245826721, + "eval_runtime": 66.4465, + "eval_samples_per_second": 15.411, + "eval_steps_per_second": 1.926, + "eval_wer": 0.4398652442448063, + "step": 10000 + }, + { + "epoch": 13.74, + "learning_rate": 5.669708029197081e-05, + "loss": 0.1975, + "step": 10500 + }, + { + "epoch": 13.74, + "eval_loss": 0.5424100160598755, + "eval_runtime": 66.9743, + "eval_samples_per_second": 15.289, + "eval_steps_per_second": 1.911, + "eval_wer": 0.4186412128017967, + "step": 10500 + }, + { + "epoch": 14.4, + "learning_rate": 5.441605839416058e-05, + "loss": 0.1856, + "step": 11000 + }, + { + "epoch": 14.4, + "eval_loss": 0.5583313703536987, + "eval_runtime": 66.5902, + "eval_samples_per_second": 15.378, + "eval_steps_per_second": 1.922, + "eval_wer": 0.42515440763615947, + "step": 11000 + }, + { + "epoch": 15.05, + "learning_rate": 5.213503649635036e-05, + "loss": 0.1872, + "step": 11500 + }, + { + "epoch": 15.05, + "eval_loss": 0.6118334531784058, + "eval_runtime": 66.3517, + "eval_samples_per_second": 15.433, + "eval_steps_per_second": 1.929, + "eval_wer": 0.433464345873105, + "step": 11500 + }, + { + "epoch": 15.71, + "learning_rate": 4.985401459854015e-05, + "loss": 0.1676, + "step": 12000 + }, + { + "epoch": 15.71, + "eval_loss": 0.6066869497299194, + "eval_runtime": 67.2768, + "eval_samples_per_second": 15.221, + "eval_steps_per_second": 1.903, + "eval_wer": 0.43627175743964064, + "step": 12000 + }, + { + "epoch": 16.36, + "learning_rate": 4.757299270072993e-05, + "loss": 0.1536, + "step": 12500 + }, + { + "epoch": 16.36, + "eval_loss": 0.5462669730186462, + "eval_runtime": 66.3968, + "eval_samples_per_second": 15.422, + "eval_steps_per_second": 1.928, + "eval_wer": 0.412914093206064, + "step": 12500 + }, + { + "epoch": 17.02, + "learning_rate": 4.5291970802919706e-05, + "loss": 0.1582, + "step": 13000 + }, + { + "epoch": 17.02, + "eval_loss": 0.5799742937088013, + "eval_runtime": 65.9177, + "eval_samples_per_second": 15.535, + "eval_steps_per_second": 1.942, + "eval_wer": 0.41021897810218977, + "step": 13000 + }, + { + "epoch": 17.67, + "learning_rate": 4.3015510948905114e-05, + "loss": 0.1429, + "step": 13500 + }, + { + "epoch": 17.67, + "eval_loss": 0.5899107456207275, + "eval_runtime": 66.1113, + "eval_samples_per_second": 15.489, + "eval_steps_per_second": 1.936, + "eval_wer": 0.4150477259966311, + "step": 13500 + }, + { + "epoch": 18.32, + "learning_rate": 4.0734489051094895e-05, + "loss": 0.1451, + "step": 14000 + }, + { + "epoch": 18.32, + "eval_loss": 0.6129721999168396, + "eval_runtime": 67.4516, + "eval_samples_per_second": 15.181, + "eval_steps_per_second": 1.898, + "eval_wer": 0.4138124649073554, + "step": 14000 + }, + { + "epoch": 18.98, + "learning_rate": 3.845346715328467e-05, + "loss": 0.1356, + "step": 14500 + }, + { + "epoch": 18.98, + "eval_loss": 0.6040089130401611, + "eval_runtime": 66.75, + "eval_samples_per_second": 15.341, + "eval_steps_per_second": 1.918, + "eval_wer": 0.4123526108927569, + "step": 14500 + }, + { + "epoch": 19.63, + "learning_rate": 3.617244525547445e-05, + "loss": 0.134, + "step": 15000 + }, + { + "epoch": 19.63, + "eval_loss": 0.5997006893157959, + "eval_runtime": 67.809, + "eval_samples_per_second": 15.101, + "eval_steps_per_second": 1.888, + "eval_wer": 0.40819764177428414, + "step": 15000 + }, + { + "epoch": 20.29, + "learning_rate": 3.389598540145986e-05, + "loss": 0.1305, + "step": 15500 + }, + { + "epoch": 20.29, + "eval_loss": 0.5769977569580078, + "eval_runtime": 66.9982, + "eval_samples_per_second": 15.284, + "eval_steps_per_second": 1.91, + "eval_wer": 0.40235822571588997, + "step": 15500 + }, + { + "epoch": 20.94, + "learning_rate": 3.161496350364963e-05, + "loss": 0.1222, + "step": 16000 + }, + { + "epoch": 20.94, + "eval_loss": 0.5980133414268494, + "eval_runtime": 66.1854, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.934, + "eval_wer": 0.391577765300393, + "step": 16000 + }, + { + "epoch": 21.6, + "learning_rate": 2.9333941605839417e-05, + "loss": 0.1214, + "step": 16500 + }, + { + "epoch": 21.6, + "eval_loss": 0.5917083024978638, + "eval_runtime": 67.5557, + "eval_samples_per_second": 15.158, + "eval_steps_per_second": 1.895, + "eval_wer": 0.4005614823133071, + "step": 16500 + }, + { + "epoch": 22.25, + "learning_rate": 2.70529197080292e-05, + "loss": 0.1184, + "step": 17000 + }, + { + "epoch": 22.25, + "eval_loss": 0.578889787197113, + "eval_runtime": 66.5407, + "eval_samples_per_second": 15.389, + "eval_steps_per_second": 1.924, + "eval_wer": 0.39460976979225154, + "step": 17000 + }, + { + "epoch": 22.91, + "learning_rate": 2.477189781021898e-05, + "loss": 0.1132, + "step": 17500 + }, + { + "epoch": 22.91, + "eval_loss": 0.5992549657821655, + "eval_runtime": 66.4759, + "eval_samples_per_second": 15.404, + "eval_steps_per_second": 1.926, + "eval_wer": 0.3892195395845031, + "step": 17500 + }, + { + "epoch": 23.56, + "learning_rate": 2.249087591240876e-05, + "loss": 0.1052, + "step": 18000 + }, + { + "epoch": 23.56, + "eval_loss": 0.5946719646453857, + "eval_runtime": 66.8036, + "eval_samples_per_second": 15.329, + "eval_steps_per_second": 1.916, + "eval_wer": 0.3889949466591802, + "step": 18000 + }, + { + "epoch": 24.21, + "learning_rate": 2.0209854014598544e-05, + "loss": 0.113, + "step": 18500 + }, + { + "epoch": 24.21, + "eval_loss": 0.6009370684623718, + "eval_runtime": 65.8523, + "eval_samples_per_second": 15.55, + "eval_steps_per_second": 1.944, + "eval_wer": 0.3841661987647389, + "step": 18500 + }, + { + "epoch": 24.87, + "learning_rate": 1.792883211678832e-05, + "loss": 0.1021, + "step": 19000 + }, + { + "epoch": 24.87, + "eval_loss": 0.6255775690078735, + "eval_runtime": 67.7689, + "eval_samples_per_second": 15.11, + "eval_steps_per_second": 1.889, + "eval_wer": 0.3841661987647389, + "step": 19000 + }, + { + "epoch": 25.52, + "learning_rate": 1.5652372262773723e-05, + "loss": 0.0966, + "step": 19500 + }, + { + "epoch": 25.52, + "eval_loss": 0.6213188171386719, + "eval_runtime": 68.5057, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.868, + "eval_wer": 0.3869736103312746, + "step": 19500 + }, + { + "epoch": 26.18, + "learning_rate": 1.3371350364963504e-05, + "loss": 0.0981, + "step": 20000 + }, + { + "epoch": 26.18, + "eval_loss": 0.6331676244735718, + "eval_runtime": 67.2276, + "eval_samples_per_second": 15.232, + "eval_steps_per_second": 1.904, + "eval_wer": 0.37866367209432905, + "step": 20000 + }, + { + "epoch": 26.83, + "learning_rate": 1.1090328467153285e-05, + "loss": 0.0983, + "step": 20500 + }, + { + "epoch": 26.83, + "eval_loss": 0.6225576996803284, + "eval_runtime": 66.5907, + "eval_samples_per_second": 15.378, + "eval_steps_per_second": 1.922, + "eval_wer": 0.3806850084222347, + "step": 20500 + }, + { + "epoch": 27.49, + "learning_rate": 8.809306569343067e-06, + "loss": 0.0915, + "step": 21000 + }, + { + "epoch": 27.49, + "eval_loss": 0.6176589727401733, + "eval_runtime": 66.1318, + "eval_samples_per_second": 15.484, + "eval_steps_per_second": 1.936, + "eval_wer": 0.3785513756316676, + "step": 21000 + }, + { + "epoch": 28.14, + "learning_rate": 6.532846715328468e-06, + "loss": 0.0898, + "step": 21500 + }, + { + "epoch": 28.14, + "eval_loss": 0.6252412796020508, + "eval_runtime": 66.2396, + "eval_samples_per_second": 15.459, + "eval_steps_per_second": 1.932, + "eval_wer": 0.3828186412128018, + "step": 21500 + }, + { + "epoch": 28.8, + "learning_rate": 4.251824817518248e-06, + "loss": 0.0916, + "step": 22000 + }, + { + "epoch": 28.8, + "eval_loss": 0.6163517832756042, + "eval_runtime": 66.1166, + "eval_samples_per_second": 15.488, + "eval_steps_per_second": 1.936, + "eval_wer": 0.3760808534531162, + "step": 22000 + } + ], + "max_steps": 22920, + "num_train_epochs": 30, + "total_flos": 6.988254857647251e+18, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_15h/checkpoint-22000/training_args.bin b/wav2vec2_esp_15h/checkpoint-22000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2e6daefc0b769b8a00a52a5abec8cceafabb368 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22000/training_args.bin differ diff --git a/wav2vec2_esp_15h/checkpoint-22500/config.json b/wav2vec2_esp_15h/checkpoint-22500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22500/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_15h/checkpoint-22500/optimizer.pt b/wav2vec2_esp_15h/checkpoint-22500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af89c3e8ebe9dd6f0a83e8629625494d7dfe2b99 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818ef3786510bd9b85118a60afb89c3e4c7ddb35ab3e16998f83ab3e5823033a +size 721723525 diff --git a/wav2vec2_esp_15h/checkpoint-22500/preprocessor_config.json b/wav2vec2_esp_15h/checkpoint-22500/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22500/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_15h/checkpoint-22500/pytorch_model.bin b/wav2vec2_esp_15h/checkpoint-22500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f81e6c9ede59a8f7c89c8bc2f2757a02fc6e3df3 --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a06b11c51f693b8bcac67bc3aaafb31d974e586dfe30cd4ae09654564ab86df +size 377691873 diff --git a/wav2vec2_esp_15h/checkpoint-22500/rng_state.pth b/wav2vec2_esp_15h/checkpoint-22500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..30b751b66d742e03a0aa243a1a3baa4467fafc89 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22500/rng_state.pth differ diff --git a/wav2vec2_esp_15h/checkpoint-22500/scaler.pt b/wav2vec2_esp_15h/checkpoint-22500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..903ca36aa95be662afe9466c56d7a62db21e8a76 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22500/scaler.pt differ diff --git a/wav2vec2_esp_15h/checkpoint-22500/scheduler.pt b/wav2vec2_esp_15h/checkpoint-22500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99f227194b8695547c89987d53322339cbf9417 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22500/scheduler.pt differ diff --git a/wav2vec2_esp_15h/checkpoint-22500/trainer_state.json b/wav2vec2_esp_15h/checkpoint-22500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99e35e430b4bf6ea3431104e8194b818d255f81c --- /dev/null +++ b/wav2vec2_esp_15h/checkpoint-22500/trainer_state.json @@ -0,0 +1,691 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.45026178010471, + "global_step": 22500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.65, + "learning_rate": 4.94e-05, + "loss": 6.5844, + "step": 500 + }, + { + "epoch": 0.65, + "eval_loss": 2.985063076019287, + "eval_runtime": 66.7207, + "eval_samples_per_second": 15.348, + "eval_steps_per_second": 1.918, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 1.31, + "learning_rate": 9.94e-05, + "loss": 2.1717, + "step": 1000 + }, + { + "epoch": 1.31, + "eval_loss": 1.0769672393798828, + "eval_runtime": 66.0484, + "eval_samples_per_second": 15.504, + "eval_steps_per_second": 1.938, + "eval_wer": 0.8259404828747895, + "step": 1000 + }, + { + "epoch": 1.96, + "learning_rate": 9.774635036496352e-05, + "loss": 1.0058, + "step": 1500 + }, + { + "epoch": 1.96, + "eval_loss": 0.7441346049308777, + "eval_runtime": 66.5848, + "eval_samples_per_second": 15.379, + "eval_steps_per_second": 1.922, + "eval_wer": 0.6882650196518809, + "step": 1500 + }, + { + "epoch": 2.62, + "learning_rate": 9.546532846715329e-05, + "loss": 0.7316, + "step": 2000 + }, + { + "epoch": 2.62, + "eval_loss": 0.6406537294387817, + "eval_runtime": 65.9141, + "eval_samples_per_second": 15.535, + "eval_steps_per_second": 1.942, + "eval_wer": 0.617630544637844, + "step": 2000 + }, + { + "epoch": 3.27, + "learning_rate": 9.318430656934307e-05, + "loss": 0.6249, + "step": 2500 + }, + { + "epoch": 3.27, + "eval_loss": 0.5814340114593506, + "eval_runtime": 66.4954, + "eval_samples_per_second": 15.4, + "eval_steps_per_second": 1.925, + "eval_wer": 0.5783267827063447, + "step": 2500 + }, + { + "epoch": 3.93, + "learning_rate": 9.090328467153285e-05, + "loss": 0.5541, + "step": 3000 + }, + { + "epoch": 3.93, + "eval_loss": 0.5387497544288635, + "eval_runtime": 66.9564, + "eval_samples_per_second": 15.294, + "eval_steps_per_second": 1.912, + "eval_wer": 0.5449747332959012, + "step": 3000 + }, + { + "epoch": 4.58, + "learning_rate": 8.862226277372263e-05, + "loss": 0.4727, + "step": 3500 + }, + { + "epoch": 4.58, + "eval_loss": 0.5657380223274231, + "eval_runtime": 66.1646, + "eval_samples_per_second": 15.477, + "eval_steps_per_second": 1.935, + "eval_wer": 0.5357664233576642, + "step": 3500 + }, + { + "epoch": 5.24, + "learning_rate": 8.634124087591242e-05, + "loss": 0.4444, + "step": 4000 + }, + { + "epoch": 5.24, + "eval_loss": 0.5406317710876465, + "eval_runtime": 65.9956, + "eval_samples_per_second": 15.516, + "eval_steps_per_second": 1.94, + "eval_wer": 0.5164514317798989, + "step": 4000 + }, + { + "epoch": 5.89, + "learning_rate": 8.406021897810219e-05, + "loss": 0.4057, + "step": 4500 + }, + { + "epoch": 5.89, + "eval_loss": 0.5271017551422119, + "eval_runtime": 66.6343, + "eval_samples_per_second": 15.367, + "eval_steps_per_second": 1.921, + "eval_wer": 0.5116226838854576, + "step": 4500 + }, + { + "epoch": 6.54, + "learning_rate": 8.177919708029196e-05, + "loss": 0.3663, + "step": 5000 + }, + { + "epoch": 6.54, + "eval_loss": 0.5105618238449097, + "eval_runtime": 66.576, + "eval_samples_per_second": 15.381, + "eval_steps_per_second": 1.923, + "eval_wer": 0.4793935991016283, + "step": 5000 + }, + { + "epoch": 7.2, + "learning_rate": 7.949817518248176e-05, + "loss": 0.3406, + "step": 5500 + }, + { + "epoch": 7.2, + "eval_loss": 0.5662369132041931, + "eval_runtime": 67.3476, + "eval_samples_per_second": 15.205, + "eval_steps_per_second": 1.901, + "eval_wer": 0.48568220101066817, + "step": 5500 + }, + { + "epoch": 7.85, + "learning_rate": 7.721715328467154e-05, + "loss": 0.3142, + "step": 6000 + }, + { + "epoch": 7.85, + "eval_loss": 0.5632680654525757, + "eval_runtime": 66.2027, + "eval_samples_per_second": 15.468, + "eval_steps_per_second": 1.933, + "eval_wer": 0.4879281302638967, + "step": 6000 + }, + { + "epoch": 8.51, + "learning_rate": 7.493613138686131e-05, + "loss": 0.3002, + "step": 6500 + }, + { + "epoch": 8.51, + "eval_loss": 0.5218114852905273, + "eval_runtime": 67.5136, + "eval_samples_per_second": 15.167, + "eval_steps_per_second": 1.896, + "eval_wer": 0.45659741718135877, + "step": 6500 + }, + { + "epoch": 9.16, + "learning_rate": 7.265967153284671e-05, + "loss": 0.2845, + "step": 7000 + }, + { + "epoch": 9.16, + "eval_loss": 0.5436348915100098, + "eval_runtime": 66.2104, + "eval_samples_per_second": 15.466, + "eval_steps_per_second": 1.933, + "eval_wer": 0.45457608085345313, + "step": 7000 + }, + { + "epoch": 9.82, + "learning_rate": 7.038321167883212e-05, + "loss": 0.2603, + "step": 7500 + }, + { + "epoch": 9.82, + "eval_loss": 0.5183758735656738, + "eval_runtime": 67.776, + "eval_samples_per_second": 15.109, + "eval_steps_per_second": 1.889, + "eval_wer": 0.4415496911847277, + "step": 7500 + }, + { + "epoch": 10.47, + "learning_rate": 6.810218978102189e-05, + "loss": 0.2438, + "step": 8000 + }, + { + "epoch": 10.47, + "eval_loss": 0.5246397256851196, + "eval_runtime": 66.5435, + "eval_samples_per_second": 15.388, + "eval_steps_per_second": 1.924, + "eval_wer": 0.43638405390230206, + "step": 8000 + }, + { + "epoch": 11.13, + "learning_rate": 6.582116788321169e-05, + "loss": 0.2396, + "step": 8500 + }, + { + "epoch": 11.13, + "eval_loss": 0.5663778781890869, + "eval_runtime": 66.6323, + "eval_samples_per_second": 15.368, + "eval_steps_per_second": 1.921, + "eval_wer": 0.4428972487366648, + "step": 8500 + }, + { + "epoch": 11.78, + "learning_rate": 6.354014598540147e-05, + "loss": 0.2229, + "step": 9000 + }, + { + "epoch": 11.78, + "eval_loss": 0.5550942420959473, + "eval_runtime": 65.2937, + "eval_samples_per_second": 15.683, + "eval_steps_per_second": 1.96, + "eval_wer": 0.4415496911847277, + "step": 9000 + }, + { + "epoch": 12.43, + "learning_rate": 6.125912408759124e-05, + "loss": 0.2088, + "step": 9500 + }, + { + "epoch": 12.43, + "eval_loss": 0.5503721833229065, + "eval_runtime": 66.2676, + "eval_samples_per_second": 15.453, + "eval_steps_per_second": 1.932, + "eval_wer": 0.4257158899494666, + "step": 9500 + }, + { + "epoch": 13.09, + "learning_rate": 5.897810218978103e-05, + "loss": 0.2042, + "step": 10000 + }, + { + "epoch": 13.09, + "eval_loss": 0.5646582245826721, + "eval_runtime": 66.4465, + "eval_samples_per_second": 15.411, + "eval_steps_per_second": 1.926, + "eval_wer": 0.4398652442448063, + "step": 10000 + }, + { + "epoch": 13.74, + "learning_rate": 5.669708029197081e-05, + "loss": 0.1975, + "step": 10500 + }, + { + "epoch": 13.74, + "eval_loss": 0.5424100160598755, + "eval_runtime": 66.9743, + "eval_samples_per_second": 15.289, + "eval_steps_per_second": 1.911, + "eval_wer": 0.4186412128017967, + "step": 10500 + }, + { + "epoch": 14.4, + "learning_rate": 5.441605839416058e-05, + "loss": 0.1856, + "step": 11000 + }, + { + "epoch": 14.4, + "eval_loss": 0.5583313703536987, + "eval_runtime": 66.5902, + "eval_samples_per_second": 15.378, + "eval_steps_per_second": 1.922, + "eval_wer": 0.42515440763615947, + "step": 11000 + }, + { + "epoch": 15.05, + "learning_rate": 5.213503649635036e-05, + "loss": 0.1872, + "step": 11500 + }, + { + "epoch": 15.05, + "eval_loss": 0.6118334531784058, + "eval_runtime": 66.3517, + "eval_samples_per_second": 15.433, + "eval_steps_per_second": 1.929, + "eval_wer": 0.433464345873105, + "step": 11500 + }, + { + "epoch": 15.71, + "learning_rate": 4.985401459854015e-05, + "loss": 0.1676, + "step": 12000 + }, + { + "epoch": 15.71, + "eval_loss": 0.6066869497299194, + "eval_runtime": 67.2768, + "eval_samples_per_second": 15.221, + "eval_steps_per_second": 1.903, + "eval_wer": 0.43627175743964064, + "step": 12000 + }, + { + "epoch": 16.36, + "learning_rate": 4.757299270072993e-05, + "loss": 0.1536, + "step": 12500 + }, + { + "epoch": 16.36, + "eval_loss": 0.5462669730186462, + "eval_runtime": 66.3968, + "eval_samples_per_second": 15.422, + "eval_steps_per_second": 1.928, + "eval_wer": 0.412914093206064, + "step": 12500 + }, + { + "epoch": 17.02, + "learning_rate": 4.5291970802919706e-05, + "loss": 0.1582, + "step": 13000 + }, + { + "epoch": 17.02, + "eval_loss": 0.5799742937088013, + "eval_runtime": 65.9177, + "eval_samples_per_second": 15.535, + "eval_steps_per_second": 1.942, + "eval_wer": 0.41021897810218977, + "step": 13000 + }, + { + "epoch": 17.67, + "learning_rate": 4.3015510948905114e-05, + "loss": 0.1429, + "step": 13500 + }, + { + "epoch": 17.67, + "eval_loss": 0.5899107456207275, + "eval_runtime": 66.1113, + "eval_samples_per_second": 15.489, + "eval_steps_per_second": 1.936, + "eval_wer": 0.4150477259966311, + "step": 13500 + }, + { + "epoch": 18.32, + "learning_rate": 4.0734489051094895e-05, + "loss": 0.1451, + "step": 14000 + }, + { + "epoch": 18.32, + "eval_loss": 0.6129721999168396, + "eval_runtime": 67.4516, + "eval_samples_per_second": 15.181, + "eval_steps_per_second": 1.898, + "eval_wer": 0.4138124649073554, + "step": 14000 + }, + { + "epoch": 18.98, + "learning_rate": 3.845346715328467e-05, + "loss": 0.1356, + "step": 14500 + }, + { + "epoch": 18.98, + "eval_loss": 0.6040089130401611, + "eval_runtime": 66.75, + "eval_samples_per_second": 15.341, + "eval_steps_per_second": 1.918, + "eval_wer": 0.4123526108927569, + "step": 14500 + }, + { + "epoch": 19.63, + "learning_rate": 3.617244525547445e-05, + "loss": 0.134, + "step": 15000 + }, + { + "epoch": 19.63, + "eval_loss": 0.5997006893157959, + "eval_runtime": 67.809, + "eval_samples_per_second": 15.101, + "eval_steps_per_second": 1.888, + "eval_wer": 0.40819764177428414, + "step": 15000 + }, + { + "epoch": 20.29, + "learning_rate": 3.389598540145986e-05, + "loss": 0.1305, + "step": 15500 + }, + { + "epoch": 20.29, + "eval_loss": 0.5769977569580078, + "eval_runtime": 66.9982, + "eval_samples_per_second": 15.284, + "eval_steps_per_second": 1.91, + "eval_wer": 0.40235822571588997, + "step": 15500 + }, + { + "epoch": 20.94, + "learning_rate": 3.161496350364963e-05, + "loss": 0.1222, + "step": 16000 + }, + { + "epoch": 20.94, + "eval_loss": 0.5980133414268494, + "eval_runtime": 66.1854, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.934, + "eval_wer": 0.391577765300393, + "step": 16000 + }, + { + "epoch": 21.6, + "learning_rate": 2.9333941605839417e-05, + "loss": 0.1214, + "step": 16500 + }, + { + "epoch": 21.6, + "eval_loss": 0.5917083024978638, + "eval_runtime": 67.5557, + "eval_samples_per_second": 15.158, + "eval_steps_per_second": 1.895, + "eval_wer": 0.4005614823133071, + "step": 16500 + }, + { + "epoch": 22.25, + "learning_rate": 2.70529197080292e-05, + "loss": 0.1184, + "step": 17000 + }, + { + "epoch": 22.25, + "eval_loss": 0.578889787197113, + "eval_runtime": 66.5407, + "eval_samples_per_second": 15.389, + "eval_steps_per_second": 1.924, + "eval_wer": 0.39460976979225154, + "step": 17000 + }, + { + "epoch": 22.91, + "learning_rate": 2.477189781021898e-05, + "loss": 0.1132, + "step": 17500 + }, + { + "epoch": 22.91, + "eval_loss": 0.5992549657821655, + "eval_runtime": 66.4759, + "eval_samples_per_second": 15.404, + "eval_steps_per_second": 1.926, + "eval_wer": 0.3892195395845031, + "step": 17500 + }, + { + "epoch": 23.56, + "learning_rate": 2.249087591240876e-05, + "loss": 0.1052, + "step": 18000 + }, + { + "epoch": 23.56, + "eval_loss": 0.5946719646453857, + "eval_runtime": 66.8036, + "eval_samples_per_second": 15.329, + "eval_steps_per_second": 1.916, + "eval_wer": 0.3889949466591802, + "step": 18000 + }, + { + "epoch": 24.21, + "learning_rate": 2.0209854014598544e-05, + "loss": 0.113, + "step": 18500 + }, + { + "epoch": 24.21, + "eval_loss": 0.6009370684623718, + "eval_runtime": 65.8523, + "eval_samples_per_second": 15.55, + "eval_steps_per_second": 1.944, + "eval_wer": 0.3841661987647389, + "step": 18500 + }, + { + "epoch": 24.87, + "learning_rate": 1.792883211678832e-05, + "loss": 0.1021, + "step": 19000 + }, + { + "epoch": 24.87, + "eval_loss": 0.6255775690078735, + "eval_runtime": 67.7689, + "eval_samples_per_second": 15.11, + "eval_steps_per_second": 1.889, + "eval_wer": 0.3841661987647389, + "step": 19000 + }, + { + "epoch": 25.52, + "learning_rate": 1.5652372262773723e-05, + "loss": 0.0966, + "step": 19500 + }, + { + "epoch": 25.52, + "eval_loss": 0.6213188171386719, + "eval_runtime": 68.5057, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.868, + "eval_wer": 0.3869736103312746, + "step": 19500 + }, + { + "epoch": 26.18, + "learning_rate": 1.3371350364963504e-05, + "loss": 0.0981, + "step": 20000 + }, + { + "epoch": 26.18, + "eval_loss": 0.6331676244735718, + "eval_runtime": 67.2276, + "eval_samples_per_second": 15.232, + "eval_steps_per_second": 1.904, + "eval_wer": 0.37866367209432905, + "step": 20000 + }, + { + "epoch": 26.83, + "learning_rate": 1.1090328467153285e-05, + "loss": 0.0983, + "step": 20500 + }, + { + "epoch": 26.83, + "eval_loss": 0.6225576996803284, + "eval_runtime": 66.5907, + "eval_samples_per_second": 15.378, + "eval_steps_per_second": 1.922, + "eval_wer": 0.3806850084222347, + "step": 20500 + }, + { + "epoch": 27.49, + "learning_rate": 8.809306569343067e-06, + "loss": 0.0915, + "step": 21000 + }, + { + "epoch": 27.49, + "eval_loss": 0.6176589727401733, + "eval_runtime": 66.1318, + "eval_samples_per_second": 15.484, + "eval_steps_per_second": 1.936, + "eval_wer": 0.3785513756316676, + "step": 21000 + }, + { + "epoch": 28.14, + "learning_rate": 6.532846715328468e-06, + "loss": 0.0898, + "step": 21500 + }, + { + "epoch": 28.14, + "eval_loss": 0.6252412796020508, + "eval_runtime": 66.2396, + "eval_samples_per_second": 15.459, + "eval_steps_per_second": 1.932, + "eval_wer": 0.3828186412128018, + "step": 21500 + }, + { + "epoch": 28.8, + "learning_rate": 4.251824817518248e-06, + "loss": 0.0916, + "step": 22000 + }, + { + "epoch": 28.8, + "eval_loss": 0.6163517832756042, + "eval_runtime": 66.1166, + "eval_samples_per_second": 15.488, + "eval_steps_per_second": 1.936, + "eval_wer": 0.3760808534531162, + "step": 22000 + }, + { + "epoch": 29.45, + "learning_rate": 1.9708029197080293e-06, + "loss": 0.0892, + "step": 22500 + }, + { + "epoch": 29.45, + "eval_loss": 0.6187598705291748, + "eval_runtime": 66.847, + "eval_samples_per_second": 15.319, + "eval_steps_per_second": 1.915, + "eval_wer": 0.37113980909601346, + "step": 22500 + } + ], + "max_steps": 22920, + "num_train_epochs": 30, + "total_flos": 7.146961381638424e+18, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_15h/checkpoint-22500/training_args.bin b/wav2vec2_esp_15h/checkpoint-22500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2e6daefc0b769b8a00a52a5abec8cceafabb368 Binary files /dev/null and b/wav2vec2_esp_15h/checkpoint-22500/training_args.bin differ diff --git a/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/1678216062.1216896/events.out.tfevents.1678216062.mint b/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/1678216062.1216896/events.out.tfevents.1678216062.mint new file mode 100644 index 0000000000000000000000000000000000000000..8e4a8945a2e3c7c78ca16c8754b29bba74149d9d Binary files /dev/null and b/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/1678216062.1216896/events.out.tfevents.1678216062.mint differ diff --git a/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/events.out.tfevents.1678216062.mint b/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/events.out.tfevents.1678216062.mint new file mode 100644 index 0000000000000000000000000000000000000000..6cff2c56c8447513f39cfeaa9fe8f6f970c9f0e9 Binary files /dev/null and b/wav2vec2_esp_15h/runs/Mar08_04-04-36_mint/events.out.tfevents.1678216062.mint differ diff --git a/wav2vec2_esp_30h/checkpoint-43500/config.json b/wav2vec2_esp_30h/checkpoint-43500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-43500/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_30h/checkpoint-43500/optimizer.pt b/wav2vec2_esp_30h/checkpoint-43500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..71c054b7d0a2c07fda640f2ec7caf495123af970 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-43500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c364ebd0559d3e71a017c630c7a911334c73e55398298942b26f7225f946ca95 +size 721723525 diff --git a/wav2vec2_esp_30h/checkpoint-43500/preprocessor_config.json b/wav2vec2_esp_30h/checkpoint-43500/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-43500/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_30h/checkpoint-43500/pytorch_model.bin b/wav2vec2_esp_30h/checkpoint-43500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..57a2ce8436c071e5d995603f3168cd29fb007d30 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-43500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6cb1a25d6fbe0dd18ef64cd5a9e527e3675149fb972e3b51c522be7a4e0c969 +size 377691873 diff --git a/wav2vec2_esp_30h/checkpoint-43500/rng_state.pth b/wav2vec2_esp_30h/checkpoint-43500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4bfed9e1eda9dd12c53423e2a787deae7afaf946 Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-43500/rng_state.pth differ diff --git a/wav2vec2_esp_30h/checkpoint-43500/scaler.pt b/wav2vec2_esp_30h/checkpoint-43500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f87166e9ea54cbffd0418e2fc6cc02afd295504a Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-43500/scaler.pt differ diff --git a/wav2vec2_esp_30h/checkpoint-43500/scheduler.pt b/wav2vec2_esp_30h/checkpoint-43500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36b64c22c9ef6a320e07ef2109208bb8d0a29e47 Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-43500/scheduler.pt differ diff --git a/wav2vec2_esp_30h/checkpoint-43500/trainer_state.json b/wav2vec2_esp_30h/checkpoint-43500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..04cb207334a5a9cef4c632cd22f547209f8d1993 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-43500/trainer_state.json @@ -0,0 +1,1321 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.61198093941457, + "global_step": 43500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.34, + "learning_rate": 4.9500000000000004e-05, + "loss": 6.2203, + "step": 500 + }, + { + "epoch": 0.34, + "eval_loss": 3.002925157546997, + "eval_runtime": 132.7789, + "eval_samples_per_second": 14.942, + "eval_steps_per_second": 1.868, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 0.68, + "learning_rate": 9.95e-05, + "loss": 2.2102, + "step": 1000 + }, + { + "epoch": 0.68, + "eval_loss": 1.1446926593780518, + "eval_runtime": 132.5997, + "eval_samples_per_second": 14.962, + "eval_steps_per_second": 1.87, + "eval_wer": 0.8542348032028871, + "step": 1000 + }, + { + "epoch": 1.02, + "learning_rate": 9.885070814952404e-05, + "loss": 0.9714, + "step": 1500 + }, + { + "epoch": 1.02, + "eval_loss": 0.7160163521766663, + "eval_runtime": 132.7305, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.868, + "eval_wer": 0.6600879666177963, + "step": 1500 + }, + { + "epoch": 1.36, + "learning_rate": 9.76898072904574e-05, + "loss": 0.7541, + "step": 2000 + }, + { + "epoch": 1.36, + "eval_loss": 0.6261711120605469, + "eval_runtime": 132.7267, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.869, + "eval_wer": 0.594733280703733, + "step": 2000 + }, + { + "epoch": 1.7, + "learning_rate": 9.652890643139077e-05, + "loss": 0.6838, + "step": 2500 + }, + { + "epoch": 1.7, + "eval_loss": 0.5789812207221985, + "eval_runtime": 131.2782, + "eval_samples_per_second": 15.113, + "eval_steps_per_second": 1.889, + "eval_wer": 0.5645088530506372, + "step": 2500 + }, + { + "epoch": 2.04, + "learning_rate": 9.537032737404225e-05, + "loss": 0.6287, + "step": 3000 + }, + { + "epoch": 2.04, + "eval_loss": 0.6015335917472839, + "eval_runtime": 133.004, + "eval_samples_per_second": 14.917, + "eval_steps_per_second": 1.865, + "eval_wer": 0.5387955340024811, + "step": 3000 + }, + { + "epoch": 2.38, + "learning_rate": 9.420942651497563e-05, + "loss": 0.5439, + "step": 3500 + }, + { + "epoch": 2.38, + "eval_loss": 0.5301734209060669, + "eval_runtime": 133.1151, + "eval_samples_per_second": 14.904, + "eval_steps_per_second": 1.863, + "eval_wer": 0.5076124957708357, + "step": 3500 + }, + { + "epoch": 2.72, + "learning_rate": 9.304852565590899e-05, + "loss": 0.5279, + "step": 4000 + }, + { + "epoch": 2.72, + "eval_loss": 0.5215476155281067, + "eval_runtime": 131.1447, + "eval_samples_per_second": 15.128, + "eval_steps_per_second": 1.891, + "eval_wer": 0.48962445020863876, + "step": 4000 + }, + { + "epoch": 3.06, + "learning_rate": 9.188762479684235e-05, + "loss": 0.5006, + "step": 4500 + }, + { + "epoch": 3.06, + "eval_loss": 0.4860161542892456, + "eval_runtime": 133.2608, + "eval_samples_per_second": 14.888, + "eval_steps_per_second": 1.861, + "eval_wer": 0.4685350174805458, + "step": 4500 + }, + { + "epoch": 3.4, + "learning_rate": 9.072672393777572e-05, + "loss": 0.4432, + "step": 5000 + }, + { + "epoch": 3.4, + "eval_loss": 0.484553724527359, + "eval_runtime": 133.4845, + "eval_samples_per_second": 14.863, + "eval_steps_per_second": 1.858, + "eval_wer": 0.46864779519566935, + "step": 5000 + }, + { + "epoch": 3.74, + "learning_rate": 8.956582307870908e-05, + "loss": 0.4334, + "step": 5500 + }, + { + "epoch": 3.74, + "eval_loss": 0.477468878030777, + "eval_runtime": 132.8856, + "eval_samples_per_second": 14.93, + "eval_steps_per_second": 1.866, + "eval_wer": 0.45415585880230064, + "step": 5500 + }, + { + "epoch": 4.08, + "learning_rate": 8.840492221964245e-05, + "loss": 0.4292, + "step": 6000 + }, + { + "epoch": 4.08, + "eval_loss": 0.451526939868927, + "eval_runtime": 132.3623, + "eval_samples_per_second": 14.989, + "eval_steps_per_second": 1.874, + "eval_wer": 0.4291192060448855, + "step": 6000 + }, + { + "epoch": 4.42, + "learning_rate": 8.724402136057582e-05, + "loss": 0.3779, + "step": 6500 + }, + { + "epoch": 4.42, + "eval_loss": 0.4495590329170227, + "eval_runtime": 132.6275, + "eval_samples_per_second": 14.959, + "eval_steps_per_second": 1.87, + "eval_wer": 0.42054809969550017, + "step": 6500 + }, + { + "epoch": 4.77, + "learning_rate": 8.608312050150917e-05, + "loss": 0.3783, + "step": 7000 + }, + { + "epoch": 4.77, + "eval_loss": 0.45758938789367676, + "eval_runtime": 132.4666, + "eval_samples_per_second": 14.977, + "eval_steps_per_second": 1.872, + "eval_wer": 0.4184053231081538, + "step": 7000 + }, + { + "epoch": 5.11, + "learning_rate": 8.492221964244254e-05, + "loss": 0.3622, + "step": 7500 + }, + { + "epoch": 5.11, + "eval_loss": 0.4782721698284149, + "eval_runtime": 133.2094, + "eval_samples_per_second": 14.894, + "eval_steps_per_second": 1.862, + "eval_wer": 0.4070147738806812, + "step": 7500 + }, + { + "epoch": 5.45, + "learning_rate": 8.37613187833759e-05, + "loss": 0.3278, + "step": 8000 + }, + { + "epoch": 5.45, + "eval_loss": 0.44267573952674866, + "eval_runtime": 133.2221, + "eval_samples_per_second": 14.892, + "eval_steps_per_second": 1.862, + "eval_wer": 0.40278560956355025, + "step": 8000 + }, + { + "epoch": 5.79, + "learning_rate": 8.260041792430927e-05, + "loss": 0.3304, + "step": 8500 + }, + { + "epoch": 5.79, + "eval_loss": 0.4482724368572235, + "eval_runtime": 132.3395, + "eval_samples_per_second": 14.992, + "eval_steps_per_second": 1.874, + "eval_wer": 0.4056050524416375, + "step": 8500 + }, + { + "epoch": 6.13, + "learning_rate": 8.143951706524264e-05, + "loss": 0.312, + "step": 9000 + }, + { + "epoch": 6.13, + "eval_loss": 0.4750816524028778, + "eval_runtime": 132.887, + "eval_samples_per_second": 14.93, + "eval_steps_per_second": 1.866, + "eval_wer": 0.3882936731701816, + "step": 9000 + }, + { + "epoch": 6.47, + "learning_rate": 8.027861620617599e-05, + "loss": 0.29, + "step": 9500 + }, + { + "epoch": 6.47, + "eval_loss": 0.4528858959674835, + "eval_runtime": 133.211, + "eval_samples_per_second": 14.894, + "eval_steps_per_second": 1.862, + "eval_wer": 0.3780872899515056, + "step": 9500 + }, + { + "epoch": 6.81, + "learning_rate": 7.912235895054563e-05, + "loss": 0.3057, + "step": 10000 + }, + { + "epoch": 6.81, + "eval_loss": 0.5861864686012268, + "eval_runtime": 133.2534, + "eval_samples_per_second": 14.889, + "eval_steps_per_second": 1.861, + "eval_wer": 0.3783128453817526, + "step": 10000 + }, + { + "epoch": 7.15, + "learning_rate": 7.796145809147898e-05, + "loss": 0.2971, + "step": 10500 + }, + { + "epoch": 7.15, + "eval_loss": 0.43457281589508057, + "eval_runtime": 132.5985, + "eval_samples_per_second": 14.962, + "eval_steps_per_second": 1.87, + "eval_wer": 0.3765084019397767, + "step": 10500 + }, + { + "epoch": 7.49, + "learning_rate": 7.680055723241235e-05, + "loss": 0.2684, + "step": 11000 + }, + { + "epoch": 7.49, + "eval_loss": 0.4560734033584595, + "eval_runtime": 132.2037, + "eval_samples_per_second": 15.007, + "eval_steps_per_second": 1.876, + "eval_wer": 0.3732942370587572, + "step": 11000 + }, + { + "epoch": 7.83, + "learning_rate": 7.563965637334573e-05, + "loss": 0.2622, + "step": 11500 + }, + { + "epoch": 7.83, + "eval_loss": 0.43240657448768616, + "eval_runtime": 133.3517, + "eval_samples_per_second": 14.878, + "eval_steps_per_second": 1.86, + "eval_wer": 0.37408368106462164, + "step": 11500 + }, + { + "epoch": 8.17, + "learning_rate": 7.447875551427908e-05, + "loss": 0.2635, + "step": 12000 + }, + { + "epoch": 8.17, + "eval_loss": 0.4556463658809662, + "eval_runtime": 133.4279, + "eval_samples_per_second": 14.869, + "eval_steps_per_second": 1.859, + "eval_wer": 0.3789895116724935, + "step": 12000 + }, + { + "epoch": 8.51, + "learning_rate": 7.331785465521245e-05, + "loss": 0.2363, + "step": 12500 + }, + { + "epoch": 8.51, + "eval_loss": 0.47497859597206116, + "eval_runtime": 134.0404, + "eval_samples_per_second": 14.802, + "eval_steps_per_second": 1.85, + "eval_wer": 0.3709822938987256, + "step": 12500 + }, + { + "epoch": 8.85, + "learning_rate": 7.215695379614582e-05, + "loss": 0.2516, + "step": 13000 + }, + { + "epoch": 8.85, + "eval_loss": 0.4297301471233368, + "eval_runtime": 133.4288, + "eval_samples_per_second": 14.869, + "eval_steps_per_second": 1.859, + "eval_wer": 0.3635389647005752, + "step": 13000 + }, + { + "epoch": 9.19, + "learning_rate": 7.099605293707917e-05, + "loss": 0.2291, + "step": 13500 + }, + { + "epoch": 9.19, + "eval_loss": 0.4862041175365448, + "eval_runtime": 133.7259, + "eval_samples_per_second": 14.836, + "eval_steps_per_second": 1.855, + "eval_wer": 0.36021202210443215, + "step": 13500 + }, + { + "epoch": 9.53, + "learning_rate": 6.983515207801255e-05, + "loss": 0.2266, + "step": 14000 + }, + { + "epoch": 9.53, + "eval_loss": 0.4703396260738373, + "eval_runtime": 133.0987, + "eval_samples_per_second": 14.906, + "eval_steps_per_second": 1.863, + "eval_wer": 0.3546295252058193, + "step": 14000 + }, + { + "epoch": 9.87, + "learning_rate": 6.867657302066403e-05, + "loss": 0.2281, + "step": 14500 + }, + { + "epoch": 9.87, + "eval_loss": 0.42882266640663147, + "eval_runtime": 133.4534, + "eval_samples_per_second": 14.867, + "eval_steps_per_second": 1.858, + "eval_wer": 0.35812563437464756, + "step": 14500 + }, + { + "epoch": 10.21, + "learning_rate": 6.75156721615974e-05, + "loss": 0.2126, + "step": 15000 + }, + { + "epoch": 10.21, + "eval_loss": 0.4615735113620758, + "eval_runtime": 132.7985, + "eval_samples_per_second": 14.94, + "eval_steps_per_second": 1.867, + "eval_wer": 0.35626480207510997, + "step": 15000 + }, + { + "epoch": 10.55, + "learning_rate": 6.635477130253078e-05, + "loss": 0.217, + "step": 15500 + }, + { + "epoch": 10.55, + "eval_loss": 0.49455586075782776, + "eval_runtime": 133.6528, + "eval_samples_per_second": 14.844, + "eval_steps_per_second": 1.856, + "eval_wer": 0.34560730799594, + "step": 15500 + }, + { + "epoch": 10.89, + "learning_rate": 6.519387044346413e-05, + "loss": 0.212, + "step": 16000 + }, + { + "epoch": 10.89, + "eval_loss": 0.43323996663093567, + "eval_runtime": 133.4419, + "eval_samples_per_second": 14.868, + "eval_steps_per_second": 1.858, + "eval_wer": 0.3452689748505695, + "step": 16000 + }, + { + "epoch": 11.23, + "learning_rate": 6.40329695843975e-05, + "loss": 0.1986, + "step": 16500 + }, + { + "epoch": 11.23, + "eval_loss": 0.4868086576461792, + "eval_runtime": 131.4603, + "eval_samples_per_second": 15.092, + "eval_steps_per_second": 1.887, + "eval_wer": 0.3399684222397654, + "step": 16500 + }, + { + "epoch": 11.57, + "learning_rate": 6.2874390527049e-05, + "loss": 0.2012, + "step": 17000 + }, + { + "epoch": 11.57, + "eval_loss": 0.4474054276943207, + "eval_runtime": 133.9018, + "eval_samples_per_second": 14.817, + "eval_steps_per_second": 1.852, + "eval_wer": 0.3460020299988722, + "step": 17000 + }, + { + "epoch": 11.91, + "learning_rate": 6.171348966798236e-05, + "loss": 0.1998, + "step": 17500 + }, + { + "epoch": 11.91, + "eval_loss": 0.45011407136917114, + "eval_runtime": 132.7933, + "eval_samples_per_second": 14.941, + "eval_steps_per_second": 1.868, + "eval_wer": 0.3362467576406902, + "step": 17500 + }, + { + "epoch": 12.25, + "learning_rate": 6.055258880891572e-05, + "loss": 0.1746, + "step": 18000 + }, + { + "epoch": 12.25, + "eval_loss": 0.4731091558933258, + "eval_runtime": 133.9219, + "eval_samples_per_second": 14.815, + "eval_steps_per_second": 1.852, + "eval_wer": 0.33630314649825194, + "step": 18000 + }, + { + "epoch": 12.59, + "learning_rate": 5.9391687949849086e-05, + "loss": 0.1805, + "step": 18500 + }, + { + "epoch": 12.59, + "eval_loss": 0.4601946771144867, + "eval_runtime": 132.0532, + "eval_samples_per_second": 15.024, + "eval_steps_per_second": 1.878, + "eval_wer": 0.33743092364948685, + "step": 18500 + }, + { + "epoch": 12.93, + "learning_rate": 5.823310889250059e-05, + "loss": 0.1826, + "step": 19000 + }, + { + "epoch": 12.93, + "eval_loss": 0.47728002071380615, + "eval_runtime": 133.9362, + "eval_samples_per_second": 14.813, + "eval_steps_per_second": 1.852, + "eval_wer": 0.3276192624337431, + "step": 19000 + }, + { + "epoch": 13.27, + "learning_rate": 5.707220803343395e-05, + "loss": 0.1651, + "step": 19500 + }, + { + "epoch": 13.27, + "eval_loss": 0.471328467130661, + "eval_runtime": 111.9462, + "eval_samples_per_second": 17.723, + "eval_steps_per_second": 2.215, + "eval_wer": 0.3304387053118304, + "step": 19500 + }, + { + "epoch": 13.61, + "learning_rate": 5.591130717436731e-05, + "loss": 0.1812, + "step": 20000 + }, + { + "epoch": 13.61, + "eval_loss": 0.4280690550804138, + "eval_runtime": 110.8492, + "eval_samples_per_second": 17.898, + "eval_steps_per_second": 2.237, + "eval_wer": 0.32276982068343296, + "step": 20000 + }, + { + "epoch": 13.96, + "learning_rate": 5.4750406315300676e-05, + "loss": 0.1666, + "step": 20500 + }, + { + "epoch": 13.96, + "eval_loss": 0.4796316623687744, + "eval_runtime": 110.5982, + "eval_samples_per_second": 17.939, + "eval_steps_per_second": 2.242, + "eval_wer": 0.32530731927371154, + "step": 20500 + }, + { + "epoch": 14.3, + "learning_rate": 5.359182725795218e-05, + "loss": 0.1553, + "step": 21000 + }, + { + "epoch": 14.3, + "eval_loss": 0.4721369445323944, + "eval_runtime": 128.2013, + "eval_samples_per_second": 15.476, + "eval_steps_per_second": 1.934, + "eval_wer": 0.3259275967068907, + "step": 21000 + }, + { + "epoch": 14.64, + "learning_rate": 5.243092639888554e-05, + "loss": 0.1545, + "step": 21500 + }, + { + "epoch": 14.64, + "eval_loss": 0.4527774453163147, + "eval_runtime": 132.8486, + "eval_samples_per_second": 14.934, + "eval_steps_per_second": 1.867, + "eval_wer": 0.32682981842787867, + "step": 21500 + }, + { + "epoch": 14.98, + "learning_rate": 5.12700255398189e-05, + "loss": 0.1576, + "step": 22000 + }, + { + "epoch": 14.98, + "eval_loss": 0.4553755819797516, + "eval_runtime": 131.6613, + "eval_samples_per_second": 15.069, + "eval_steps_per_second": 1.884, + "eval_wer": 0.3252509304161498, + "step": 22000 + }, + { + "epoch": 15.32, + "learning_rate": 5.0109124680752265e-05, + "loss": 0.1511, + "step": 22500 + }, + { + "epoch": 15.32, + "eval_loss": 0.4580444097518921, + "eval_runtime": 134.2974, + "eval_samples_per_second": 14.773, + "eval_steps_per_second": 1.847, + "eval_wer": 0.3179203789331228, + "step": 22500 + }, + { + "epoch": 15.66, + "learning_rate": 4.895054562340376e-05, + "loss": 0.1444, + "step": 23000 + }, + { + "epoch": 15.66, + "eval_loss": 0.4659278094768524, + "eval_runtime": 134.6637, + "eval_samples_per_second": 14.733, + "eval_steps_per_second": 1.842, + "eval_wer": 0.32119093267170407, + "step": 23000 + }, + { + "epoch": 16.0, + "learning_rate": 4.7791966566055266e-05, + "loss": 0.1496, + "step": 23500 + }, + { + "epoch": 16.0, + "eval_loss": 0.4660351872444153, + "eval_runtime": 133.4382, + "eval_samples_per_second": 14.868, + "eval_steps_per_second": 1.859, + "eval_wer": 0.32598398556445246, + "step": 23500 + }, + { + "epoch": 16.34, + "learning_rate": 4.6631065706988625e-05, + "loss": 0.1327, + "step": 24000 + }, + { + "epoch": 16.34, + "eval_loss": 0.4934828281402588, + "eval_runtime": 134.3743, + "eval_samples_per_second": 14.765, + "eval_steps_per_second": 1.846, + "eval_wer": 0.31825871207849327, + "step": 24000 + }, + { + "epoch": 16.68, + "learning_rate": 4.547016484792199e-05, + "loss": 0.1535, + "step": 24500 + }, + { + "epoch": 16.68, + "eval_loss": 0.46466243267059326, + "eval_runtime": 132.9271, + "eval_samples_per_second": 14.925, + "eval_steps_per_second": 1.866, + "eval_wer": 0.3113792714559603, + "step": 24500 + }, + { + "epoch": 17.02, + "learning_rate": 4.430926398885535e-05, + "loss": 0.1438, + "step": 25000 + }, + { + "epoch": 17.02, + "eval_loss": 0.5053276419639587, + "eval_runtime": 132.7443, + "eval_samples_per_second": 14.946, + "eval_steps_per_second": 1.868, + "eval_wer": 0.31487538062478854, + "step": 25000 + }, + { + "epoch": 17.36, + "learning_rate": 4.314836312978872e-05, + "loss": 0.1264, + "step": 25500 + }, + { + "epoch": 17.36, + "eval_loss": 0.48962870240211487, + "eval_runtime": 133.9594, + "eval_samples_per_second": 14.81, + "eval_steps_per_second": 1.851, + "eval_wer": 0.3131273260403744, + "step": 25500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1987462270722084e-05, + "loss": 0.1269, + "step": 26000 + }, + { + "epoch": 17.7, + "eval_loss": 0.46815410256385803, + "eval_runtime": 132.7946, + "eval_samples_per_second": 14.94, + "eval_steps_per_second": 1.868, + "eval_wer": 0.30748844028419986, + "step": 26000 + }, + { + "epoch": 18.04, + "learning_rate": 4.0826561411655444e-05, + "loss": 0.1272, + "step": 26500 + }, + { + "epoch": 18.04, + "eval_loss": 0.4759911596775055, + "eval_runtime": 132.8376, + "eval_samples_per_second": 14.936, + "eval_steps_per_second": 1.867, + "eval_wer": 0.3113792714559603, + "step": 26500 + }, + { + "epoch": 18.38, + "learning_rate": 3.966566055258881e-05, + "loss": 0.1219, + "step": 27000 + }, + { + "epoch": 18.38, + "eval_loss": 0.49612942337989807, + "eval_runtime": 133.1179, + "eval_samples_per_second": 14.904, + "eval_steps_per_second": 1.863, + "eval_wer": 0.31487538062478854, + "step": 27000 + }, + { + "epoch": 18.72, + "learning_rate": 3.850475969352218e-05, + "loss": 0.123, + "step": 27500 + }, + { + "epoch": 18.72, + "eval_loss": 0.46862220764160156, + "eval_runtime": 110.6681, + "eval_samples_per_second": 17.927, + "eval_steps_per_second": 2.241, + "eval_wer": 0.3072064959963911, + "step": 27500 + }, + { + "epoch": 19.06, + "learning_rate": 3.734385883445554e-05, + "loss": 0.1262, + "step": 28000 + }, + { + "epoch": 19.06, + "eval_loss": 0.49365413188934326, + "eval_runtime": 109.7879, + "eval_samples_per_second": 18.071, + "eval_steps_per_second": 2.259, + "eval_wer": 0.30889816172324347, + "step": 28000 + }, + { + "epoch": 19.4, + "learning_rate": 3.61829579753889e-05, + "loss": 0.1165, + "step": 28500 + }, + { + "epoch": 19.4, + "eval_loss": 0.4988892376422882, + "eval_runtime": 110.8122, + "eval_samples_per_second": 17.904, + "eval_steps_per_second": 2.238, + "eval_wer": 0.30540205255441527, + "step": 28500 + }, + { + "epoch": 19.74, + "learning_rate": 3.50243789180404e-05, + "loss": 0.1213, + "step": 29000 + }, + { + "epoch": 19.74, + "eval_loss": 0.4937494993209839, + "eval_runtime": 109.7673, + "eval_samples_per_second": 18.075, + "eval_steps_per_second": 2.259, + "eval_wer": 0.30404871997293337, + "step": 29000 + }, + { + "epoch": 20.08, + "learning_rate": 3.3863478058973766e-05, + "loss": 0.1113, + "step": 29500 + }, + { + "epoch": 20.08, + "eval_loss": 0.5031572580337524, + "eval_runtime": 111.1946, + "eval_samples_per_second": 17.843, + "eval_steps_per_second": 2.23, + "eval_wer": 0.3037667756851246, + "step": 29500 + }, + { + "epoch": 20.42, + "learning_rate": 3.270257719990713e-05, + "loss": 0.1087, + "step": 30000 + }, + { + "epoch": 20.42, + "eval_loss": 0.49494612216949463, + "eval_runtime": 126.8851, + "eval_samples_per_second": 15.636, + "eval_steps_per_second": 1.955, + "eval_wer": 0.299030111649938, + "step": 30000 + }, + { + "epoch": 20.76, + "learning_rate": 3.154167634084049e-05, + "loss": 0.1128, + "step": 30500 + }, + { + "epoch": 20.76, + "eval_loss": 0.49934807419776917, + "eval_runtime": 128.8495, + "eval_samples_per_second": 15.398, + "eval_steps_per_second": 1.925, + "eval_wer": 0.3017367768129018, + "step": 30500 + }, + { + "epoch": 21.1, + "learning_rate": 3.038077548177386e-05, + "loss": 0.1151, + "step": 31000 + }, + { + "epoch": 21.1, + "eval_loss": 0.5088058114051819, + "eval_runtime": 128.5026, + "eval_samples_per_second": 15.439, + "eval_steps_per_second": 1.93, + "eval_wer": 0.30184955452802525, + "step": 31000 + }, + { + "epoch": 21.44, + "learning_rate": 2.922219642442536e-05, + "loss": 0.1025, + "step": 31500 + }, + { + "epoch": 21.44, + "eval_loss": 0.47611942887306213, + "eval_runtime": 111.1648, + "eval_samples_per_second": 17.847, + "eval_steps_per_second": 2.231, + "eval_wer": 0.29891733393481446, + "step": 31500 + }, + { + "epoch": 21.78, + "learning_rate": 2.8063617367076856e-05, + "loss": 0.1016, + "step": 32000 + }, + { + "epoch": 21.78, + "eval_loss": 0.5126113891601562, + "eval_runtime": 121.5373, + "eval_samples_per_second": 16.324, + "eval_steps_per_second": 2.041, + "eval_wer": 0.30275177624901317, + "step": 32000 + }, + { + "epoch": 22.12, + "learning_rate": 2.690271650801022e-05, + "loss": 0.0991, + "step": 32500 + }, + { + "epoch": 22.12, + "eval_loss": 0.5214097499847412, + "eval_runtime": 129.4856, + "eval_samples_per_second": 15.322, + "eval_steps_per_second": 1.915, + "eval_wer": 0.2958159467689185, + "step": 32500 + }, + { + "epoch": 22.46, + "learning_rate": 2.574181564894358e-05, + "loss": 0.0951, + "step": 33000 + }, + { + "epoch": 22.46, + "eval_loss": 0.5233449339866638, + "eval_runtime": 130.9955, + "eval_samples_per_second": 15.146, + "eval_steps_per_second": 1.893, + "eval_wer": 0.2923198376000902, + "step": 33000 + }, + { + "epoch": 22.8, + "learning_rate": 2.4580914789876948e-05, + "loss": 0.1049, + "step": 33500 + }, + { + "epoch": 22.8, + "eval_loss": 0.48444411158561707, + "eval_runtime": 110.8951, + "eval_samples_per_second": 17.891, + "eval_steps_per_second": 2.236, + "eval_wer": 0.29677455734746816, + "step": 33500 + }, + { + "epoch": 23.14, + "learning_rate": 2.342001393081031e-05, + "loss": 0.1014, + "step": 34000 + }, + { + "epoch": 23.14, + "eval_loss": 0.4997089207172394, + "eval_runtime": 111.4563, + "eval_samples_per_second": 17.801, + "eval_steps_per_second": 2.225, + "eval_wer": 0.292376226457652, + "step": 34000 + }, + { + "epoch": 23.49, + "learning_rate": 2.2261434873461805e-05, + "loss": 0.0959, + "step": 34500 + }, + { + "epoch": 23.49, + "eval_loss": 0.48040756583213806, + "eval_runtime": 129.4236, + "eval_samples_per_second": 15.33, + "eval_steps_per_second": 1.916, + "eval_wer": 0.28978233900981165, + "step": 34500 + }, + { + "epoch": 23.83, + "learning_rate": 2.110053401439517e-05, + "loss": 0.098, + "step": 35000 + }, + { + "epoch": 23.83, + "eval_loss": 0.501395046710968, + "eval_runtime": 129.507, + "eval_samples_per_second": 15.32, + "eval_steps_per_second": 1.915, + "eval_wer": 0.291699560166911, + "step": 35000 + }, + { + "epoch": 24.17, + "learning_rate": 1.9939633155328534e-05, + "loss": 0.0973, + "step": 35500 + }, + { + "epoch": 24.17, + "eval_loss": 0.5193932056427002, + "eval_runtime": 129.3856, + "eval_samples_per_second": 15.334, + "eval_steps_per_second": 1.917, + "eval_wer": 0.28955678357956466, + "step": 35500 + }, + { + "epoch": 24.51, + "learning_rate": 1.87787322962619e-05, + "loss": 0.0876, + "step": 36000 + }, + { + "epoch": 24.51, + "eval_loss": 0.5203258395195007, + "eval_runtime": 125.0985, + "eval_samples_per_second": 15.859, + "eval_steps_per_second": 1.982, + "eval_wer": 0.28718845156197137, + "step": 36000 + }, + { + "epoch": 24.85, + "learning_rate": 1.7617831437195264e-05, + "loss": 0.0902, + "step": 36500 + }, + { + "epoch": 24.85, + "eval_loss": 0.536376953125, + "eval_runtime": 113.525, + "eval_samples_per_second": 17.476, + "eval_steps_per_second": 2.185, + "eval_wer": 0.29136122702154055, + "step": 36500 + }, + { + "epoch": 25.19, + "learning_rate": 1.645693057812863e-05, + "loss": 0.0889, + "step": 37000 + }, + { + "epoch": 25.19, + "eval_loss": 0.531440019607544, + "eval_runtime": 123.4292, + "eval_samples_per_second": 16.074, + "eval_steps_per_second": 2.009, + "eval_wer": 0.2880906732829593, + "step": 37000 + }, + { + "epoch": 25.53, + "learning_rate": 1.5296029719061993e-05, + "loss": 0.0865, + "step": 37500 + }, + { + "epoch": 25.53, + "eval_loss": 0.5107194185256958, + "eval_runtime": 129.5881, + "eval_samples_per_second": 15.31, + "eval_steps_per_second": 1.914, + "eval_wer": 0.285271230404872, + "step": 37500 + }, + { + "epoch": 25.87, + "learning_rate": 1.4135128859995356e-05, + "loss": 0.0859, + "step": 38000 + }, + { + "epoch": 25.87, + "eval_loss": 0.5254319310188293, + "eval_runtime": 128.9308, + "eval_samples_per_second": 15.388, + "eval_steps_per_second": 1.924, + "eval_wer": 0.28780872899515053, + "step": 38000 + }, + { + "epoch": 26.21, + "learning_rate": 1.2974228000928721e-05, + "loss": 0.0813, + "step": 38500 + }, + { + "epoch": 26.21, + "eval_loss": 0.5275471806526184, + "eval_runtime": 110.8092, + "eval_samples_per_second": 17.905, + "eval_steps_per_second": 2.238, + "eval_wer": 0.2846509529716928, + "step": 38500 + }, + { + "epoch": 26.55, + "learning_rate": 1.1813327141862086e-05, + "loss": 0.0881, + "step": 39000 + }, + { + "epoch": 26.55, + "eval_loss": 0.5125020742416382, + "eval_runtime": 113.2218, + "eval_samples_per_second": 17.523, + "eval_steps_per_second": 2.19, + "eval_wer": 0.28324123153264913, + "step": 39000 + }, + { + "epoch": 26.89, + "learning_rate": 1.0654748084513583e-05, + "loss": 0.0822, + "step": 39500 + }, + { + "epoch": 26.89, + "eval_loss": 0.5309813618659973, + "eval_runtime": 129.6599, + "eval_samples_per_second": 15.302, + "eval_steps_per_second": 1.913, + "eval_wer": 0.28634261869854516, + "step": 39500 + }, + { + "epoch": 27.23, + "learning_rate": 9.493847225446947e-06, + "loss": 0.0837, + "step": 40000 + }, + { + "epoch": 27.23, + "eval_loss": 0.5188203454017639, + "eval_runtime": 122.3408, + "eval_samples_per_second": 16.217, + "eval_steps_per_second": 2.027, + "eval_wer": 0.28312845381752566, + "step": 40000 + }, + { + "epoch": 27.57, + "learning_rate": 8.332946366380312e-06, + "loss": 0.0823, + "step": 40500 + }, + { + "epoch": 27.57, + "eval_loss": 0.5201263427734375, + "eval_runtime": 118.4812, + "eval_samples_per_second": 16.745, + "eval_steps_per_second": 2.093, + "eval_wer": 0.28132401037554977, + "step": 40500 + }, + { + "epoch": 27.91, + "learning_rate": 7.172045507313675e-06, + "loss": 0.0768, + "step": 41000 + }, + { + "epoch": 27.91, + "eval_loss": 0.5197951793670654, + "eval_runtime": 112.4253, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.206, + "eval_wer": 0.282169843238976, + "step": 41000 + }, + { + "epoch": 28.25, + "learning_rate": 6.013466449965173e-06, + "loss": 0.0858, + "step": 41500 + }, + { + "epoch": 28.25, + "eval_loss": 0.5245384573936462, + "eval_runtime": 128.7266, + "eval_samples_per_second": 15.413, + "eval_steps_per_second": 1.927, + "eval_wer": 0.2786737340701477, + "step": 41500 + }, + { + "epoch": 28.59, + "learning_rate": 4.8525655908985375e-06, + "loss": 0.0757, + "step": 42000 + }, + { + "epoch": 28.59, + "eval_loss": 0.5289037227630615, + "eval_runtime": 112.5429, + "eval_samples_per_second": 17.629, + "eval_steps_per_second": 2.204, + "eval_wer": 0.2766437351979249, + "step": 42000 + }, + { + "epoch": 28.93, + "learning_rate": 3.6916647318319014e-06, + "loss": 0.0728, + "step": 42500 + }, + { + "epoch": 28.93, + "eval_loss": 0.5263972282409668, + "eval_runtime": 128.0915, + "eval_samples_per_second": 15.489, + "eval_steps_per_second": 1.936, + "eval_wer": 0.2779970677794068, + "step": 42500 + }, + { + "epoch": 29.27, + "learning_rate": 2.5307638727652657e-06, + "loss": 0.0761, + "step": 43000 + }, + { + "epoch": 29.27, + "eval_loss": 0.5295293927192688, + "eval_runtime": 128.9693, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.923, + "eval_wer": 0.2770384572008571, + "step": 43000 + }, + { + "epoch": 29.61, + "learning_rate": 1.3698630136986302e-06, + "loss": 0.0739, + "step": 43500 + }, + { + "epoch": 29.61, + "eval_loss": 0.5267478823661804, + "eval_runtime": 130.4434, + "eval_samples_per_second": 15.21, + "eval_steps_per_second": 1.901, + "eval_wer": 0.2776023457764746, + "step": 43500 + } + ], + "max_steps": 44070, + "num_train_epochs": 30, + "total_flos": 1.385946883261073e+19, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_30h/checkpoint-43500/training_args.bin b/wav2vec2_esp_30h/checkpoint-43500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..57f0270bdb34f3de2f22f9a97722ae6a8635f8cc Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-43500/training_args.bin differ diff --git a/wav2vec2_esp_30h/checkpoint-44000/config.json b/wav2vec2_esp_30h/checkpoint-44000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-44000/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_30h/checkpoint-44000/optimizer.pt b/wav2vec2_esp_30h/checkpoint-44000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd18ea1b7956d2ecfdb5dd88669998dfe867e864 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-44000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7110237357fe715569e2188e2f23724c51489b913e416952292d171c99b2fa02 +size 721723525 diff --git a/wav2vec2_esp_30h/checkpoint-44000/preprocessor_config.json b/wav2vec2_esp_30h/checkpoint-44000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-44000/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_30h/checkpoint-44000/pytorch_model.bin b/wav2vec2_esp_30h/checkpoint-44000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0efa692fe5716342d68bf5ad9eafd6212b3b87d --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-44000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc8f115c830b9237ad6fa177601ec8905b1b79caf32dde93e014e800a55d7f7 +size 377691873 diff --git a/wav2vec2_esp_30h/checkpoint-44000/rng_state.pth b/wav2vec2_esp_30h/checkpoint-44000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5069d4a5b4b69e1238998fe65f89a22565714c8 Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-44000/rng_state.pth differ diff --git a/wav2vec2_esp_30h/checkpoint-44000/scaler.pt b/wav2vec2_esp_30h/checkpoint-44000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc1060e3f5d54cd64a69a8f5625ea81acfeb4985 Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-44000/scaler.pt differ diff --git a/wav2vec2_esp_30h/checkpoint-44000/scheduler.pt b/wav2vec2_esp_30h/checkpoint-44000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c494e8b9caa35e2ccc3da17f3d88ae3b42c95da Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-44000/scheduler.pt differ diff --git a/wav2vec2_esp_30h/checkpoint-44000/trainer_state.json b/wav2vec2_esp_30h/checkpoint-44000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1839f8922902cb7b0d00b404e11204fcef4fe9fd --- /dev/null +++ b/wav2vec2_esp_30h/checkpoint-44000/trainer_state.json @@ -0,0 +1,1336 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.952348536419333, + "global_step": 44000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.34, + "learning_rate": 4.9500000000000004e-05, + "loss": 6.2203, + "step": 500 + }, + { + "epoch": 0.34, + "eval_loss": 3.002925157546997, + "eval_runtime": 132.7789, + "eval_samples_per_second": 14.942, + "eval_steps_per_second": 1.868, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 0.68, + "learning_rate": 9.95e-05, + "loss": 2.2102, + "step": 1000 + }, + { + "epoch": 0.68, + "eval_loss": 1.1446926593780518, + "eval_runtime": 132.5997, + "eval_samples_per_second": 14.962, + "eval_steps_per_second": 1.87, + "eval_wer": 0.8542348032028871, + "step": 1000 + }, + { + "epoch": 1.02, + "learning_rate": 9.885070814952404e-05, + "loss": 0.9714, + "step": 1500 + }, + { + "epoch": 1.02, + "eval_loss": 0.7160163521766663, + "eval_runtime": 132.7305, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.868, + "eval_wer": 0.6600879666177963, + "step": 1500 + }, + { + "epoch": 1.36, + "learning_rate": 9.76898072904574e-05, + "loss": 0.7541, + "step": 2000 + }, + { + "epoch": 1.36, + "eval_loss": 0.6261711120605469, + "eval_runtime": 132.7267, + "eval_samples_per_second": 14.948, + "eval_steps_per_second": 1.869, + "eval_wer": 0.594733280703733, + "step": 2000 + }, + { + "epoch": 1.7, + "learning_rate": 9.652890643139077e-05, + "loss": 0.6838, + "step": 2500 + }, + { + "epoch": 1.7, + "eval_loss": 0.5789812207221985, + "eval_runtime": 131.2782, + "eval_samples_per_second": 15.113, + "eval_steps_per_second": 1.889, + "eval_wer": 0.5645088530506372, + "step": 2500 + }, + { + "epoch": 2.04, + "learning_rate": 9.537032737404225e-05, + "loss": 0.6287, + "step": 3000 + }, + { + "epoch": 2.04, + "eval_loss": 0.6015335917472839, + "eval_runtime": 133.004, + "eval_samples_per_second": 14.917, + "eval_steps_per_second": 1.865, + "eval_wer": 0.5387955340024811, + "step": 3000 + }, + { + "epoch": 2.38, + "learning_rate": 9.420942651497563e-05, + "loss": 0.5439, + "step": 3500 + }, + { + "epoch": 2.38, + "eval_loss": 0.5301734209060669, + "eval_runtime": 133.1151, + "eval_samples_per_second": 14.904, + "eval_steps_per_second": 1.863, + "eval_wer": 0.5076124957708357, + "step": 3500 + }, + { + "epoch": 2.72, + "learning_rate": 9.304852565590899e-05, + "loss": 0.5279, + "step": 4000 + }, + { + "epoch": 2.72, + "eval_loss": 0.5215476155281067, + "eval_runtime": 131.1447, + "eval_samples_per_second": 15.128, + "eval_steps_per_second": 1.891, + "eval_wer": 0.48962445020863876, + "step": 4000 + }, + { + "epoch": 3.06, + "learning_rate": 9.188762479684235e-05, + "loss": 0.5006, + "step": 4500 + }, + { + "epoch": 3.06, + "eval_loss": 0.4860161542892456, + "eval_runtime": 133.2608, + "eval_samples_per_second": 14.888, + "eval_steps_per_second": 1.861, + "eval_wer": 0.4685350174805458, + "step": 4500 + }, + { + "epoch": 3.4, + "learning_rate": 9.072672393777572e-05, + "loss": 0.4432, + "step": 5000 + }, + { + "epoch": 3.4, + "eval_loss": 0.484553724527359, + "eval_runtime": 133.4845, + "eval_samples_per_second": 14.863, + "eval_steps_per_second": 1.858, + "eval_wer": 0.46864779519566935, + "step": 5000 + }, + { + "epoch": 3.74, + "learning_rate": 8.956582307870908e-05, + "loss": 0.4334, + "step": 5500 + }, + { + "epoch": 3.74, + "eval_loss": 0.477468878030777, + "eval_runtime": 132.8856, + "eval_samples_per_second": 14.93, + "eval_steps_per_second": 1.866, + "eval_wer": 0.45415585880230064, + "step": 5500 + }, + { + "epoch": 4.08, + "learning_rate": 8.840492221964245e-05, + "loss": 0.4292, + "step": 6000 + }, + { + "epoch": 4.08, + "eval_loss": 0.451526939868927, + "eval_runtime": 132.3623, + "eval_samples_per_second": 14.989, + "eval_steps_per_second": 1.874, + "eval_wer": 0.4291192060448855, + "step": 6000 + }, + { + "epoch": 4.42, + "learning_rate": 8.724402136057582e-05, + "loss": 0.3779, + "step": 6500 + }, + { + "epoch": 4.42, + "eval_loss": 0.4495590329170227, + "eval_runtime": 132.6275, + "eval_samples_per_second": 14.959, + "eval_steps_per_second": 1.87, + "eval_wer": 0.42054809969550017, + "step": 6500 + }, + { + "epoch": 4.77, + "learning_rate": 8.608312050150917e-05, + "loss": 0.3783, + "step": 7000 + }, + { + "epoch": 4.77, + "eval_loss": 0.45758938789367676, + "eval_runtime": 132.4666, + "eval_samples_per_second": 14.977, + "eval_steps_per_second": 1.872, + "eval_wer": 0.4184053231081538, + "step": 7000 + }, + { + "epoch": 5.11, + "learning_rate": 8.492221964244254e-05, + "loss": 0.3622, + "step": 7500 + }, + { + "epoch": 5.11, + "eval_loss": 0.4782721698284149, + "eval_runtime": 133.2094, + "eval_samples_per_second": 14.894, + "eval_steps_per_second": 1.862, + "eval_wer": 0.4070147738806812, + "step": 7500 + }, + { + "epoch": 5.45, + "learning_rate": 8.37613187833759e-05, + "loss": 0.3278, + "step": 8000 + }, + { + "epoch": 5.45, + "eval_loss": 0.44267573952674866, + "eval_runtime": 133.2221, + "eval_samples_per_second": 14.892, + "eval_steps_per_second": 1.862, + "eval_wer": 0.40278560956355025, + "step": 8000 + }, + { + "epoch": 5.79, + "learning_rate": 8.260041792430927e-05, + "loss": 0.3304, + "step": 8500 + }, + { + "epoch": 5.79, + "eval_loss": 0.4482724368572235, + "eval_runtime": 132.3395, + "eval_samples_per_second": 14.992, + "eval_steps_per_second": 1.874, + "eval_wer": 0.4056050524416375, + "step": 8500 + }, + { + "epoch": 6.13, + "learning_rate": 8.143951706524264e-05, + "loss": 0.312, + "step": 9000 + }, + { + "epoch": 6.13, + "eval_loss": 0.4750816524028778, + "eval_runtime": 132.887, + "eval_samples_per_second": 14.93, + "eval_steps_per_second": 1.866, + "eval_wer": 0.3882936731701816, + "step": 9000 + }, + { + "epoch": 6.47, + "learning_rate": 8.027861620617599e-05, + "loss": 0.29, + "step": 9500 + }, + { + "epoch": 6.47, + "eval_loss": 0.4528858959674835, + "eval_runtime": 133.211, + "eval_samples_per_second": 14.894, + "eval_steps_per_second": 1.862, + "eval_wer": 0.3780872899515056, + "step": 9500 + }, + { + "epoch": 6.81, + "learning_rate": 7.912235895054563e-05, + "loss": 0.3057, + "step": 10000 + }, + { + "epoch": 6.81, + "eval_loss": 0.5861864686012268, + "eval_runtime": 133.2534, + "eval_samples_per_second": 14.889, + "eval_steps_per_second": 1.861, + "eval_wer": 0.3783128453817526, + "step": 10000 + }, + { + "epoch": 7.15, + "learning_rate": 7.796145809147898e-05, + "loss": 0.2971, + "step": 10500 + }, + { + "epoch": 7.15, + "eval_loss": 0.43457281589508057, + "eval_runtime": 132.5985, + "eval_samples_per_second": 14.962, + "eval_steps_per_second": 1.87, + "eval_wer": 0.3765084019397767, + "step": 10500 + }, + { + "epoch": 7.49, + "learning_rate": 7.680055723241235e-05, + "loss": 0.2684, + "step": 11000 + }, + { + "epoch": 7.49, + "eval_loss": 0.4560734033584595, + "eval_runtime": 132.2037, + "eval_samples_per_second": 15.007, + "eval_steps_per_second": 1.876, + "eval_wer": 0.3732942370587572, + "step": 11000 + }, + { + "epoch": 7.83, + "learning_rate": 7.563965637334573e-05, + "loss": 0.2622, + "step": 11500 + }, + { + "epoch": 7.83, + "eval_loss": 0.43240657448768616, + "eval_runtime": 133.3517, + "eval_samples_per_second": 14.878, + "eval_steps_per_second": 1.86, + "eval_wer": 0.37408368106462164, + "step": 11500 + }, + { + "epoch": 8.17, + "learning_rate": 7.447875551427908e-05, + "loss": 0.2635, + "step": 12000 + }, + { + "epoch": 8.17, + "eval_loss": 0.4556463658809662, + "eval_runtime": 133.4279, + "eval_samples_per_second": 14.869, + "eval_steps_per_second": 1.859, + "eval_wer": 0.3789895116724935, + "step": 12000 + }, + { + "epoch": 8.51, + "learning_rate": 7.331785465521245e-05, + "loss": 0.2363, + "step": 12500 + }, + { + "epoch": 8.51, + "eval_loss": 0.47497859597206116, + "eval_runtime": 134.0404, + "eval_samples_per_second": 14.802, + "eval_steps_per_second": 1.85, + "eval_wer": 0.3709822938987256, + "step": 12500 + }, + { + "epoch": 8.85, + "learning_rate": 7.215695379614582e-05, + "loss": 0.2516, + "step": 13000 + }, + { + "epoch": 8.85, + "eval_loss": 0.4297301471233368, + "eval_runtime": 133.4288, + "eval_samples_per_second": 14.869, + "eval_steps_per_second": 1.859, + "eval_wer": 0.3635389647005752, + "step": 13000 + }, + { + "epoch": 9.19, + "learning_rate": 7.099605293707917e-05, + "loss": 0.2291, + "step": 13500 + }, + { + "epoch": 9.19, + "eval_loss": 0.4862041175365448, + "eval_runtime": 133.7259, + "eval_samples_per_second": 14.836, + "eval_steps_per_second": 1.855, + "eval_wer": 0.36021202210443215, + "step": 13500 + }, + { + "epoch": 9.53, + "learning_rate": 6.983515207801255e-05, + "loss": 0.2266, + "step": 14000 + }, + { + "epoch": 9.53, + "eval_loss": 0.4703396260738373, + "eval_runtime": 133.0987, + "eval_samples_per_second": 14.906, + "eval_steps_per_second": 1.863, + "eval_wer": 0.3546295252058193, + "step": 14000 + }, + { + "epoch": 9.87, + "learning_rate": 6.867657302066403e-05, + "loss": 0.2281, + "step": 14500 + }, + { + "epoch": 9.87, + "eval_loss": 0.42882266640663147, + "eval_runtime": 133.4534, + "eval_samples_per_second": 14.867, + "eval_steps_per_second": 1.858, + "eval_wer": 0.35812563437464756, + "step": 14500 + }, + { + "epoch": 10.21, + "learning_rate": 6.75156721615974e-05, + "loss": 0.2126, + "step": 15000 + }, + { + "epoch": 10.21, + "eval_loss": 0.4615735113620758, + "eval_runtime": 132.7985, + "eval_samples_per_second": 14.94, + "eval_steps_per_second": 1.867, + "eval_wer": 0.35626480207510997, + "step": 15000 + }, + { + "epoch": 10.55, + "learning_rate": 6.635477130253078e-05, + "loss": 0.217, + "step": 15500 + }, + { + "epoch": 10.55, + "eval_loss": 0.49455586075782776, + "eval_runtime": 133.6528, + "eval_samples_per_second": 14.844, + "eval_steps_per_second": 1.856, + "eval_wer": 0.34560730799594, + "step": 15500 + }, + { + "epoch": 10.89, + "learning_rate": 6.519387044346413e-05, + "loss": 0.212, + "step": 16000 + }, + { + "epoch": 10.89, + "eval_loss": 0.43323996663093567, + "eval_runtime": 133.4419, + "eval_samples_per_second": 14.868, + "eval_steps_per_second": 1.858, + "eval_wer": 0.3452689748505695, + "step": 16000 + }, + { + "epoch": 11.23, + "learning_rate": 6.40329695843975e-05, + "loss": 0.1986, + "step": 16500 + }, + { + "epoch": 11.23, + "eval_loss": 0.4868086576461792, + "eval_runtime": 131.4603, + "eval_samples_per_second": 15.092, + "eval_steps_per_second": 1.887, + "eval_wer": 0.3399684222397654, + "step": 16500 + }, + { + "epoch": 11.57, + "learning_rate": 6.2874390527049e-05, + "loss": 0.2012, + "step": 17000 + }, + { + "epoch": 11.57, + "eval_loss": 0.4474054276943207, + "eval_runtime": 133.9018, + "eval_samples_per_second": 14.817, + "eval_steps_per_second": 1.852, + "eval_wer": 0.3460020299988722, + "step": 17000 + }, + { + "epoch": 11.91, + "learning_rate": 6.171348966798236e-05, + "loss": 0.1998, + "step": 17500 + }, + { + "epoch": 11.91, + "eval_loss": 0.45011407136917114, + "eval_runtime": 132.7933, + "eval_samples_per_second": 14.941, + "eval_steps_per_second": 1.868, + "eval_wer": 0.3362467576406902, + "step": 17500 + }, + { + "epoch": 12.25, + "learning_rate": 6.055258880891572e-05, + "loss": 0.1746, + "step": 18000 + }, + { + "epoch": 12.25, + "eval_loss": 0.4731091558933258, + "eval_runtime": 133.9219, + "eval_samples_per_second": 14.815, + "eval_steps_per_second": 1.852, + "eval_wer": 0.33630314649825194, + "step": 18000 + }, + { + "epoch": 12.59, + "learning_rate": 5.9391687949849086e-05, + "loss": 0.1805, + "step": 18500 + }, + { + "epoch": 12.59, + "eval_loss": 0.4601946771144867, + "eval_runtime": 132.0532, + "eval_samples_per_second": 15.024, + "eval_steps_per_second": 1.878, + "eval_wer": 0.33743092364948685, + "step": 18500 + }, + { + "epoch": 12.93, + "learning_rate": 5.823310889250059e-05, + "loss": 0.1826, + "step": 19000 + }, + { + "epoch": 12.93, + "eval_loss": 0.47728002071380615, + "eval_runtime": 133.9362, + "eval_samples_per_second": 14.813, + "eval_steps_per_second": 1.852, + "eval_wer": 0.3276192624337431, + "step": 19000 + }, + { + "epoch": 13.27, + "learning_rate": 5.707220803343395e-05, + "loss": 0.1651, + "step": 19500 + }, + { + "epoch": 13.27, + "eval_loss": 0.471328467130661, + "eval_runtime": 111.9462, + "eval_samples_per_second": 17.723, + "eval_steps_per_second": 2.215, + "eval_wer": 0.3304387053118304, + "step": 19500 + }, + { + "epoch": 13.61, + "learning_rate": 5.591130717436731e-05, + "loss": 0.1812, + "step": 20000 + }, + { + "epoch": 13.61, + "eval_loss": 0.4280690550804138, + "eval_runtime": 110.8492, + "eval_samples_per_second": 17.898, + "eval_steps_per_second": 2.237, + "eval_wer": 0.32276982068343296, + "step": 20000 + }, + { + "epoch": 13.96, + "learning_rate": 5.4750406315300676e-05, + "loss": 0.1666, + "step": 20500 + }, + { + "epoch": 13.96, + "eval_loss": 0.4796316623687744, + "eval_runtime": 110.5982, + "eval_samples_per_second": 17.939, + "eval_steps_per_second": 2.242, + "eval_wer": 0.32530731927371154, + "step": 20500 + }, + { + "epoch": 14.3, + "learning_rate": 5.359182725795218e-05, + "loss": 0.1553, + "step": 21000 + }, + { + "epoch": 14.3, + "eval_loss": 0.4721369445323944, + "eval_runtime": 128.2013, + "eval_samples_per_second": 15.476, + "eval_steps_per_second": 1.934, + "eval_wer": 0.3259275967068907, + "step": 21000 + }, + { + "epoch": 14.64, + "learning_rate": 5.243092639888554e-05, + "loss": 0.1545, + "step": 21500 + }, + { + "epoch": 14.64, + "eval_loss": 0.4527774453163147, + "eval_runtime": 132.8486, + "eval_samples_per_second": 14.934, + "eval_steps_per_second": 1.867, + "eval_wer": 0.32682981842787867, + "step": 21500 + }, + { + "epoch": 14.98, + "learning_rate": 5.12700255398189e-05, + "loss": 0.1576, + "step": 22000 + }, + { + "epoch": 14.98, + "eval_loss": 0.4553755819797516, + "eval_runtime": 131.6613, + "eval_samples_per_second": 15.069, + "eval_steps_per_second": 1.884, + "eval_wer": 0.3252509304161498, + "step": 22000 + }, + { + "epoch": 15.32, + "learning_rate": 5.0109124680752265e-05, + "loss": 0.1511, + "step": 22500 + }, + { + "epoch": 15.32, + "eval_loss": 0.4580444097518921, + "eval_runtime": 134.2974, + "eval_samples_per_second": 14.773, + "eval_steps_per_second": 1.847, + "eval_wer": 0.3179203789331228, + "step": 22500 + }, + { + "epoch": 15.66, + "learning_rate": 4.895054562340376e-05, + "loss": 0.1444, + "step": 23000 + }, + { + "epoch": 15.66, + "eval_loss": 0.4659278094768524, + "eval_runtime": 134.6637, + "eval_samples_per_second": 14.733, + "eval_steps_per_second": 1.842, + "eval_wer": 0.32119093267170407, + "step": 23000 + }, + { + "epoch": 16.0, + "learning_rate": 4.7791966566055266e-05, + "loss": 0.1496, + "step": 23500 + }, + { + "epoch": 16.0, + "eval_loss": 0.4660351872444153, + "eval_runtime": 133.4382, + "eval_samples_per_second": 14.868, + "eval_steps_per_second": 1.859, + "eval_wer": 0.32598398556445246, + "step": 23500 + }, + { + "epoch": 16.34, + "learning_rate": 4.6631065706988625e-05, + "loss": 0.1327, + "step": 24000 + }, + { + "epoch": 16.34, + "eval_loss": 0.4934828281402588, + "eval_runtime": 134.3743, + "eval_samples_per_second": 14.765, + "eval_steps_per_second": 1.846, + "eval_wer": 0.31825871207849327, + "step": 24000 + }, + { + "epoch": 16.68, + "learning_rate": 4.547016484792199e-05, + "loss": 0.1535, + "step": 24500 + }, + { + "epoch": 16.68, + "eval_loss": 0.46466243267059326, + "eval_runtime": 132.9271, + "eval_samples_per_second": 14.925, + "eval_steps_per_second": 1.866, + "eval_wer": 0.3113792714559603, + "step": 24500 + }, + { + "epoch": 17.02, + "learning_rate": 4.430926398885535e-05, + "loss": 0.1438, + "step": 25000 + }, + { + "epoch": 17.02, + "eval_loss": 0.5053276419639587, + "eval_runtime": 132.7443, + "eval_samples_per_second": 14.946, + "eval_steps_per_second": 1.868, + "eval_wer": 0.31487538062478854, + "step": 25000 + }, + { + "epoch": 17.36, + "learning_rate": 4.314836312978872e-05, + "loss": 0.1264, + "step": 25500 + }, + { + "epoch": 17.36, + "eval_loss": 0.48962870240211487, + "eval_runtime": 133.9594, + "eval_samples_per_second": 14.81, + "eval_steps_per_second": 1.851, + "eval_wer": 0.3131273260403744, + "step": 25500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1987462270722084e-05, + "loss": 0.1269, + "step": 26000 + }, + { + "epoch": 17.7, + "eval_loss": 0.46815410256385803, + "eval_runtime": 132.7946, + "eval_samples_per_second": 14.94, + "eval_steps_per_second": 1.868, + "eval_wer": 0.30748844028419986, + "step": 26000 + }, + { + "epoch": 18.04, + "learning_rate": 4.0826561411655444e-05, + "loss": 0.1272, + "step": 26500 + }, + { + "epoch": 18.04, + "eval_loss": 0.4759911596775055, + "eval_runtime": 132.8376, + "eval_samples_per_second": 14.936, + "eval_steps_per_second": 1.867, + "eval_wer": 0.3113792714559603, + "step": 26500 + }, + { + "epoch": 18.38, + "learning_rate": 3.966566055258881e-05, + "loss": 0.1219, + "step": 27000 + }, + { + "epoch": 18.38, + "eval_loss": 0.49612942337989807, + "eval_runtime": 133.1179, + "eval_samples_per_second": 14.904, + "eval_steps_per_second": 1.863, + "eval_wer": 0.31487538062478854, + "step": 27000 + }, + { + "epoch": 18.72, + "learning_rate": 3.850475969352218e-05, + "loss": 0.123, + "step": 27500 + }, + { + "epoch": 18.72, + "eval_loss": 0.46862220764160156, + "eval_runtime": 110.6681, + "eval_samples_per_second": 17.927, + "eval_steps_per_second": 2.241, + "eval_wer": 0.3072064959963911, + "step": 27500 + }, + { + "epoch": 19.06, + "learning_rate": 3.734385883445554e-05, + "loss": 0.1262, + "step": 28000 + }, + { + "epoch": 19.06, + "eval_loss": 0.49365413188934326, + "eval_runtime": 109.7879, + "eval_samples_per_second": 18.071, + "eval_steps_per_second": 2.259, + "eval_wer": 0.30889816172324347, + "step": 28000 + }, + { + "epoch": 19.4, + "learning_rate": 3.61829579753889e-05, + "loss": 0.1165, + "step": 28500 + }, + { + "epoch": 19.4, + "eval_loss": 0.4988892376422882, + "eval_runtime": 110.8122, + "eval_samples_per_second": 17.904, + "eval_steps_per_second": 2.238, + "eval_wer": 0.30540205255441527, + "step": 28500 + }, + { + "epoch": 19.74, + "learning_rate": 3.50243789180404e-05, + "loss": 0.1213, + "step": 29000 + }, + { + "epoch": 19.74, + "eval_loss": 0.4937494993209839, + "eval_runtime": 109.7673, + "eval_samples_per_second": 18.075, + "eval_steps_per_second": 2.259, + "eval_wer": 0.30404871997293337, + "step": 29000 + }, + { + "epoch": 20.08, + "learning_rate": 3.3863478058973766e-05, + "loss": 0.1113, + "step": 29500 + }, + { + "epoch": 20.08, + "eval_loss": 0.5031572580337524, + "eval_runtime": 111.1946, + "eval_samples_per_second": 17.843, + "eval_steps_per_second": 2.23, + "eval_wer": 0.3037667756851246, + "step": 29500 + }, + { + "epoch": 20.42, + "learning_rate": 3.270257719990713e-05, + "loss": 0.1087, + "step": 30000 + }, + { + "epoch": 20.42, + "eval_loss": 0.49494612216949463, + "eval_runtime": 126.8851, + "eval_samples_per_second": 15.636, + "eval_steps_per_second": 1.955, + "eval_wer": 0.299030111649938, + "step": 30000 + }, + { + "epoch": 20.76, + "learning_rate": 3.154167634084049e-05, + "loss": 0.1128, + "step": 30500 + }, + { + "epoch": 20.76, + "eval_loss": 0.49934807419776917, + "eval_runtime": 128.8495, + "eval_samples_per_second": 15.398, + "eval_steps_per_second": 1.925, + "eval_wer": 0.3017367768129018, + "step": 30500 + }, + { + "epoch": 21.1, + "learning_rate": 3.038077548177386e-05, + "loss": 0.1151, + "step": 31000 + }, + { + "epoch": 21.1, + "eval_loss": 0.5088058114051819, + "eval_runtime": 128.5026, + "eval_samples_per_second": 15.439, + "eval_steps_per_second": 1.93, + "eval_wer": 0.30184955452802525, + "step": 31000 + }, + { + "epoch": 21.44, + "learning_rate": 2.922219642442536e-05, + "loss": 0.1025, + "step": 31500 + }, + { + "epoch": 21.44, + "eval_loss": 0.47611942887306213, + "eval_runtime": 111.1648, + "eval_samples_per_second": 17.847, + "eval_steps_per_second": 2.231, + "eval_wer": 0.29891733393481446, + "step": 31500 + }, + { + "epoch": 21.78, + "learning_rate": 2.8063617367076856e-05, + "loss": 0.1016, + "step": 32000 + }, + { + "epoch": 21.78, + "eval_loss": 0.5126113891601562, + "eval_runtime": 121.5373, + "eval_samples_per_second": 16.324, + "eval_steps_per_second": 2.041, + "eval_wer": 0.30275177624901317, + "step": 32000 + }, + { + "epoch": 22.12, + "learning_rate": 2.690271650801022e-05, + "loss": 0.0991, + "step": 32500 + }, + { + "epoch": 22.12, + "eval_loss": 0.5214097499847412, + "eval_runtime": 129.4856, + "eval_samples_per_second": 15.322, + "eval_steps_per_second": 1.915, + "eval_wer": 0.2958159467689185, + "step": 32500 + }, + { + "epoch": 22.46, + "learning_rate": 2.574181564894358e-05, + "loss": 0.0951, + "step": 33000 + }, + { + "epoch": 22.46, + "eval_loss": 0.5233449339866638, + "eval_runtime": 130.9955, + "eval_samples_per_second": 15.146, + "eval_steps_per_second": 1.893, + "eval_wer": 0.2923198376000902, + "step": 33000 + }, + { + "epoch": 22.8, + "learning_rate": 2.4580914789876948e-05, + "loss": 0.1049, + "step": 33500 + }, + { + "epoch": 22.8, + "eval_loss": 0.48444411158561707, + "eval_runtime": 110.8951, + "eval_samples_per_second": 17.891, + "eval_steps_per_second": 2.236, + "eval_wer": 0.29677455734746816, + "step": 33500 + }, + { + "epoch": 23.14, + "learning_rate": 2.342001393081031e-05, + "loss": 0.1014, + "step": 34000 + }, + { + "epoch": 23.14, + "eval_loss": 0.4997089207172394, + "eval_runtime": 111.4563, + "eval_samples_per_second": 17.801, + "eval_steps_per_second": 2.225, + "eval_wer": 0.292376226457652, + "step": 34000 + }, + { + "epoch": 23.49, + "learning_rate": 2.2261434873461805e-05, + "loss": 0.0959, + "step": 34500 + }, + { + "epoch": 23.49, + "eval_loss": 0.48040756583213806, + "eval_runtime": 129.4236, + "eval_samples_per_second": 15.33, + "eval_steps_per_second": 1.916, + "eval_wer": 0.28978233900981165, + "step": 34500 + }, + { + "epoch": 23.83, + "learning_rate": 2.110053401439517e-05, + "loss": 0.098, + "step": 35000 + }, + { + "epoch": 23.83, + "eval_loss": 0.501395046710968, + "eval_runtime": 129.507, + "eval_samples_per_second": 15.32, + "eval_steps_per_second": 1.915, + "eval_wer": 0.291699560166911, + "step": 35000 + }, + { + "epoch": 24.17, + "learning_rate": 1.9939633155328534e-05, + "loss": 0.0973, + "step": 35500 + }, + { + "epoch": 24.17, + "eval_loss": 0.5193932056427002, + "eval_runtime": 129.3856, + "eval_samples_per_second": 15.334, + "eval_steps_per_second": 1.917, + "eval_wer": 0.28955678357956466, + "step": 35500 + }, + { + "epoch": 24.51, + "learning_rate": 1.87787322962619e-05, + "loss": 0.0876, + "step": 36000 + }, + { + "epoch": 24.51, + "eval_loss": 0.5203258395195007, + "eval_runtime": 125.0985, + "eval_samples_per_second": 15.859, + "eval_steps_per_second": 1.982, + "eval_wer": 0.28718845156197137, + "step": 36000 + }, + { + "epoch": 24.85, + "learning_rate": 1.7617831437195264e-05, + "loss": 0.0902, + "step": 36500 + }, + { + "epoch": 24.85, + "eval_loss": 0.536376953125, + "eval_runtime": 113.525, + "eval_samples_per_second": 17.476, + "eval_steps_per_second": 2.185, + "eval_wer": 0.29136122702154055, + "step": 36500 + }, + { + "epoch": 25.19, + "learning_rate": 1.645693057812863e-05, + "loss": 0.0889, + "step": 37000 + }, + { + "epoch": 25.19, + "eval_loss": 0.531440019607544, + "eval_runtime": 123.4292, + "eval_samples_per_second": 16.074, + "eval_steps_per_second": 2.009, + "eval_wer": 0.2880906732829593, + "step": 37000 + }, + { + "epoch": 25.53, + "learning_rate": 1.5296029719061993e-05, + "loss": 0.0865, + "step": 37500 + }, + { + "epoch": 25.53, + "eval_loss": 0.5107194185256958, + "eval_runtime": 129.5881, + "eval_samples_per_second": 15.31, + "eval_steps_per_second": 1.914, + "eval_wer": 0.285271230404872, + "step": 37500 + }, + { + "epoch": 25.87, + "learning_rate": 1.4135128859995356e-05, + "loss": 0.0859, + "step": 38000 + }, + { + "epoch": 25.87, + "eval_loss": 0.5254319310188293, + "eval_runtime": 128.9308, + "eval_samples_per_second": 15.388, + "eval_steps_per_second": 1.924, + "eval_wer": 0.28780872899515053, + "step": 38000 + }, + { + "epoch": 26.21, + "learning_rate": 1.2974228000928721e-05, + "loss": 0.0813, + "step": 38500 + }, + { + "epoch": 26.21, + "eval_loss": 0.5275471806526184, + "eval_runtime": 110.8092, + "eval_samples_per_second": 17.905, + "eval_steps_per_second": 2.238, + "eval_wer": 0.2846509529716928, + "step": 38500 + }, + { + "epoch": 26.55, + "learning_rate": 1.1813327141862086e-05, + "loss": 0.0881, + "step": 39000 + }, + { + "epoch": 26.55, + "eval_loss": 0.5125020742416382, + "eval_runtime": 113.2218, + "eval_samples_per_second": 17.523, + "eval_steps_per_second": 2.19, + "eval_wer": 0.28324123153264913, + "step": 39000 + }, + { + "epoch": 26.89, + "learning_rate": 1.0654748084513583e-05, + "loss": 0.0822, + "step": 39500 + }, + { + "epoch": 26.89, + "eval_loss": 0.5309813618659973, + "eval_runtime": 129.6599, + "eval_samples_per_second": 15.302, + "eval_steps_per_second": 1.913, + "eval_wer": 0.28634261869854516, + "step": 39500 + }, + { + "epoch": 27.23, + "learning_rate": 9.493847225446947e-06, + "loss": 0.0837, + "step": 40000 + }, + { + "epoch": 27.23, + "eval_loss": 0.5188203454017639, + "eval_runtime": 122.3408, + "eval_samples_per_second": 16.217, + "eval_steps_per_second": 2.027, + "eval_wer": 0.28312845381752566, + "step": 40000 + }, + { + "epoch": 27.57, + "learning_rate": 8.332946366380312e-06, + "loss": 0.0823, + "step": 40500 + }, + { + "epoch": 27.57, + "eval_loss": 0.5201263427734375, + "eval_runtime": 118.4812, + "eval_samples_per_second": 16.745, + "eval_steps_per_second": 2.093, + "eval_wer": 0.28132401037554977, + "step": 40500 + }, + { + "epoch": 27.91, + "learning_rate": 7.172045507313675e-06, + "loss": 0.0768, + "step": 41000 + }, + { + "epoch": 27.91, + "eval_loss": 0.5197951793670654, + "eval_runtime": 112.4253, + "eval_samples_per_second": 17.647, + "eval_steps_per_second": 2.206, + "eval_wer": 0.282169843238976, + "step": 41000 + }, + { + "epoch": 28.25, + "learning_rate": 6.013466449965173e-06, + "loss": 0.0858, + "step": 41500 + }, + { + "epoch": 28.25, + "eval_loss": 0.5245384573936462, + "eval_runtime": 128.7266, + "eval_samples_per_second": 15.413, + "eval_steps_per_second": 1.927, + "eval_wer": 0.2786737340701477, + "step": 41500 + }, + { + "epoch": 28.59, + "learning_rate": 4.8525655908985375e-06, + "loss": 0.0757, + "step": 42000 + }, + { + "epoch": 28.59, + "eval_loss": 0.5289037227630615, + "eval_runtime": 112.5429, + "eval_samples_per_second": 17.629, + "eval_steps_per_second": 2.204, + "eval_wer": 0.2766437351979249, + "step": 42000 + }, + { + "epoch": 28.93, + "learning_rate": 3.6916647318319014e-06, + "loss": 0.0728, + "step": 42500 + }, + { + "epoch": 28.93, + "eval_loss": 0.5263972282409668, + "eval_runtime": 128.0915, + "eval_samples_per_second": 15.489, + "eval_steps_per_second": 1.936, + "eval_wer": 0.2779970677794068, + "step": 42500 + }, + { + "epoch": 29.27, + "learning_rate": 2.5307638727652657e-06, + "loss": 0.0761, + "step": 43000 + }, + { + "epoch": 29.27, + "eval_loss": 0.5295293927192688, + "eval_runtime": 128.9693, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.923, + "eval_wer": 0.2770384572008571, + "step": 43000 + }, + { + "epoch": 29.61, + "learning_rate": 1.3698630136986302e-06, + "loss": 0.0739, + "step": 43500 + }, + { + "epoch": 29.61, + "eval_loss": 0.5267478823661804, + "eval_runtime": 130.4434, + "eval_samples_per_second": 15.21, + "eval_steps_per_second": 1.901, + "eval_wer": 0.2776023457764746, + "step": 43500 + }, + { + "epoch": 29.95, + "learning_rate": 2.0896215463199446e-07, + "loss": 0.0734, + "step": 44000 + }, + { + "epoch": 29.95, + "eval_loss": 0.5276228189468384, + "eval_runtime": 110.9362, + "eval_samples_per_second": 17.884, + "eval_steps_per_second": 2.236, + "eval_wer": 0.2766437351979249, + "step": 44000 + } + ], + "max_steps": 44070, + "num_train_epochs": 30, + "total_flos": 1.401945817013882e+19, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_30h/checkpoint-44000/training_args.bin b/wav2vec2_esp_30h/checkpoint-44000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..57f0270bdb34f3de2f22f9a97722ae6a8635f8cc Binary files /dev/null and b/wav2vec2_esp_30h/checkpoint-44000/training_args.bin differ diff --git a/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/1678211683.9648502/events.out.tfevents.1678211683.mint b/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/1678211683.9648502/events.out.tfevents.1678211683.mint new file mode 100644 index 0000000000000000000000000000000000000000..e56ed879ecfadbdb2f9330d44fca6495e4a04b44 Binary files /dev/null and b/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/1678211683.9648502/events.out.tfevents.1678211683.mint differ diff --git a/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/events.out.tfevents.1678211683.mint b/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/events.out.tfevents.1678211683.mint new file mode 100644 index 0000000000000000000000000000000000000000..00caa6d8a45a5226b547ffc2143173e6c8ef2003 Binary files /dev/null and b/wav2vec2_esp_30h/runs/Mar08_02-48-55_mint/events.out.tfevents.1678211683.mint differ diff --git a/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/1678215863.173738/events.out.tfevents.1678215863.mint b/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/1678215863.173738/events.out.tfevents.1678215863.mint new file mode 100644 index 0000000000000000000000000000000000000000..247002bdb32775f5b343571119cfc04fe4fe620e Binary files /dev/null and b/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/1678215863.173738/events.out.tfevents.1678215863.mint differ diff --git a/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/events.out.tfevents.1678215863.mint b/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/events.out.tfevents.1678215863.mint new file mode 100644 index 0000000000000000000000000000000000000000..c6b6cd08833638841e66e583cc4fcb9b6ad119a1 Binary files /dev/null and b/wav2vec2_esp_30h/runs/Mar08_03-58-32_mint/events.out.tfevents.1678215863.mint differ diff --git a/wav2vec2_esp_5h/checkpoint-8000/config.json b/wav2vec2_esp_5h/checkpoint-8000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8000/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_5h/checkpoint-8000/optimizer.pt b/wav2vec2_esp_5h/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c6c60e811878d2a4d45445d244f542bc549b876 --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42646df58671960375957cbd8f37fd7a40c3231761fa6deec20b76aab5d09d31 +size 721723525 diff --git a/wav2vec2_esp_5h/checkpoint-8000/preprocessor_config.json b/wav2vec2_esp_5h/checkpoint-8000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8000/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_5h/checkpoint-8000/pytorch_model.bin b/wav2vec2_esp_5h/checkpoint-8000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..048bb0291c49434cd1435b22a0f694bd371a7ea7 --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce65b43cf3bfb9430a3d7c78e0144ff466f29c79cf64d87f6c644ef8cc7b74bf +size 377691873 diff --git a/wav2vec2_esp_5h/checkpoint-8000/rng_state.pth b/wav2vec2_esp_5h/checkpoint-8000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..76176708a3697af0ff16a3e0b3acd011079dbe1e Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8000/rng_state.pth differ diff --git a/wav2vec2_esp_5h/checkpoint-8000/scaler.pt b/wav2vec2_esp_5h/checkpoint-8000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..582d541222fc485bae190a0033e7d45dbd9f58e7 Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8000/scaler.pt differ diff --git a/wav2vec2_esp_5h/checkpoint-8000/scheduler.pt b/wav2vec2_esp_5h/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e09076a4264d4b01ec7399b93827add38dd716c Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8000/scheduler.pt differ diff --git a/wav2vec2_esp_5h/checkpoint-8000/trainer_state.json b/wav2vec2_esp_5h/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..88b10d6eece16a473d27987a692f1771c83241fa --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8000/trainer_state.json @@ -0,0 +1,256 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.11864406779661, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.69, + "learning_rate": 4.94e-05, + "loss": 6.7713, + "step": 500 + }, + { + "epoch": 1.69, + "eval_loss": 2.9682114124298096, + "eval_runtime": 24.5806, + "eval_samples_per_second": 15.663, + "eval_steps_per_second": 1.993, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 3.39, + "learning_rate": 9.94e-05, + "loss": 1.7717, + "step": 1000 + }, + { + "epoch": 3.39, + "eval_loss": 1.053725004196167, + "eval_runtime": 25.0326, + "eval_samples_per_second": 15.38, + "eval_steps_per_second": 1.957, + "eval_wer": 0.7996899224806202, + "step": 1000 + }, + { + "epoch": 5.08, + "learning_rate": 9.370700636942676e-05, + "loss": 0.8188, + "step": 1500 + }, + { + "epoch": 5.08, + "eval_loss": 0.723295271396637, + "eval_runtime": 24.8888, + "eval_samples_per_second": 15.469, + "eval_steps_per_second": 1.969, + "eval_wer": 0.6505426356589147, + "step": 1500 + }, + { + "epoch": 6.78, + "learning_rate": 8.73375796178344e-05, + "loss": 0.5633, + "step": 2000 + }, + { + "epoch": 6.78, + "eval_loss": 0.6577075719833374, + "eval_runtime": 24.4626, + "eval_samples_per_second": 15.738, + "eval_steps_per_second": 2.003, + "eval_wer": 0.6083720930232558, + "step": 2000 + }, + { + "epoch": 8.47, + "learning_rate": 8.096815286624205e-05, + "loss": 0.4201, + "step": 2500 + }, + { + "epoch": 8.47, + "eval_loss": 0.6801705360412598, + "eval_runtime": 23.9758, + "eval_samples_per_second": 16.058, + "eval_steps_per_second": 2.044, + "eval_wer": 0.5937984496124031, + "step": 2500 + }, + { + "epoch": 10.17, + "learning_rate": 7.459872611464968e-05, + "loss": 0.3465, + "step": 3000 + }, + { + "epoch": 10.17, + "eval_loss": 0.734240710735321, + "eval_runtime": 24.3607, + "eval_samples_per_second": 15.804, + "eval_steps_per_second": 2.011, + "eval_wer": 0.5792248062015504, + "step": 3000 + }, + { + "epoch": 11.86, + "learning_rate": 6.822929936305733e-05, + "loss": 0.2812, + "step": 3500 + }, + { + "epoch": 11.86, + "eval_loss": 0.654410183429718, + "eval_runtime": 25.1886, + "eval_samples_per_second": 15.285, + "eval_steps_per_second": 1.945, + "eval_wer": 0.56, + "step": 3500 + }, + { + "epoch": 13.56, + "learning_rate": 6.185987261146497e-05, + "loss": 0.2362, + "step": 4000 + }, + { + "epoch": 13.56, + "eval_loss": 0.6740626692771912, + "eval_runtime": 24.8977, + "eval_samples_per_second": 15.463, + "eval_steps_per_second": 1.968, + "eval_wer": 0.5311627906976745, + "step": 4000 + }, + { + "epoch": 15.25, + "learning_rate": 5.549044585987262e-05, + "loss": 0.2042, + "step": 4500 + }, + { + "epoch": 15.25, + "eval_loss": 0.7329800724983215, + "eval_runtime": 24.5766, + "eval_samples_per_second": 15.665, + "eval_steps_per_second": 1.994, + "eval_wer": 0.5221705426356589, + "step": 4500 + }, + { + "epoch": 16.95, + "learning_rate": 4.912101910828026e-05, + "loss": 0.1881, + "step": 5000 + }, + { + "epoch": 16.95, + "eval_loss": 0.7085126042366028, + "eval_runtime": 25.0265, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.958, + "eval_wer": 0.5184496124031007, + "step": 5000 + }, + { + "epoch": 18.64, + "learning_rate": 4.27515923566879e-05, + "loss": 0.1632, + "step": 5500 + }, + { + "epoch": 18.64, + "eval_loss": 0.6873669624328613, + "eval_runtime": 24.3859, + "eval_samples_per_second": 15.788, + "eval_steps_per_second": 2.009, + "eval_wer": 0.5255813953488372, + "step": 5500 + }, + { + "epoch": 20.34, + "learning_rate": 3.638216560509554e-05, + "loss": 0.1502, + "step": 6000 + }, + { + "epoch": 20.34, + "eval_loss": 0.7731661200523376, + "eval_runtime": 25.086, + "eval_samples_per_second": 15.347, + "eval_steps_per_second": 1.953, + "eval_wer": 0.5193798449612403, + "step": 6000 + }, + { + "epoch": 22.03, + "learning_rate": 3.0012738853503187e-05, + "loss": 0.1338, + "step": 6500 + }, + { + "epoch": 22.03, + "eval_loss": 0.7070124745368958, + "eval_runtime": 24.9391, + "eval_samples_per_second": 15.438, + "eval_steps_per_second": 1.965, + "eval_wer": 0.5041860465116279, + "step": 6500 + }, + { + "epoch": 23.73, + "learning_rate": 2.3656050955414013e-05, + "loss": 0.1295, + "step": 7000 + }, + { + "epoch": 23.73, + "eval_loss": 0.7655993103981018, + "eval_runtime": 24.8534, + "eval_samples_per_second": 15.491, + "eval_steps_per_second": 1.972, + "eval_wer": 0.4951937984496124, + "step": 7000 + }, + { + "epoch": 25.42, + "learning_rate": 1.7286624203821657e-05, + "loss": 0.1143, + "step": 7500 + }, + { + "epoch": 25.42, + "eval_loss": 0.7407109141349792, + "eval_runtime": 24.926, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.966, + "eval_wer": 0.4951937984496124, + "step": 7500 + }, + { + "epoch": 27.12, + "learning_rate": 1.09171974522293e-05, + "loss": 0.104, + "step": 8000 + }, + { + "epoch": 27.12, + "eval_loss": 0.7474448680877686, + "eval_runtime": 25.1716, + "eval_samples_per_second": 15.295, + "eval_steps_per_second": 1.947, + "eval_wer": 0.4846511627906977, + "step": 8000 + } + ], + "max_steps": 8850, + "num_train_epochs": 30, + "total_flos": 2.5455023655397975e+18, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_5h/checkpoint-8000/training_args.bin b/wav2vec2_esp_5h/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..32c3e368cba4588bfa2535cdc8cdc42bcf6b164c Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8000/training_args.bin differ diff --git a/wav2vec2_esp_5h/checkpoint-8500/config.json b/wav2vec2_esp_5h/checkpoint-8500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..647caeba97229b51ef3269ab43fbe635ecb4e3be --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8500/config.json @@ -0,0 +1,117 @@ +{ + "_name_or_path": "facebook/wav2vec2-base", + "activation_dropout": 0.0, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "freeze_feat_extract_train": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "no_mask_channel_overlap": false, + "no_mask_time_overlap": false, + "num_adapter_layers": 3, + "num_attention_heads": 12, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 12, + "num_negatives": 100, + "output_hidden_size": 768, + "pad_token_id": 37, + "proj_codevector_dim": 256, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.17.0", + "use_weighted_layer_sum": false, + "vocab_size": 38, + "xvector_output_dim": 512 +} diff --git a/wav2vec2_esp_5h/checkpoint-8500/optimizer.pt b/wav2vec2_esp_5h/checkpoint-8500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb804b2ba5f166fe3f31261f4a0ca369d7f34481 --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b793944ec260409c61fbfbba8ea55f5c4d93048f58afb0fceb453de00d458b2 +size 721723525 diff --git a/wav2vec2_esp_5h/checkpoint-8500/preprocessor_config.json b/wav2vec2_esp_5h/checkpoint-8500/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..73caa151574001d3d495fae897e1d38968249712 --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8500/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/wav2vec2_esp_5h/checkpoint-8500/pytorch_model.bin b/wav2vec2_esp_5h/checkpoint-8500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f7f7d78cce7aeb04353ce4abb8159e4d50e9cad --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5743a94968427bab85cc6faf81a243b0035265ad5de5e4223e91239ef239ef +size 377691873 diff --git a/wav2vec2_esp_5h/checkpoint-8500/rng_state.pth b/wav2vec2_esp_5h/checkpoint-8500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..714c02b05fd5737bfd970a1e9dca4705988ddd0f Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8500/rng_state.pth differ diff --git a/wav2vec2_esp_5h/checkpoint-8500/scaler.pt b/wav2vec2_esp_5h/checkpoint-8500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e5963d76293475fbed57cafbaac2696eaa5003e Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8500/scaler.pt differ diff --git a/wav2vec2_esp_5h/checkpoint-8500/scheduler.pt b/wav2vec2_esp_5h/checkpoint-8500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d839e0225a070d37200ef0154345abc2da5710ea Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8500/scheduler.pt differ diff --git a/wav2vec2_esp_5h/checkpoint-8500/trainer_state.json b/wav2vec2_esp_5h/checkpoint-8500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..25472eaa940edabe5415847e9ea2cf9762e6d80b --- /dev/null +++ b/wav2vec2_esp_5h/checkpoint-8500/trainer_state.json @@ -0,0 +1,271 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.8135593220339, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.69, + "learning_rate": 4.94e-05, + "loss": 6.7713, + "step": 500 + }, + { + "epoch": 1.69, + "eval_loss": 2.9682114124298096, + "eval_runtime": 24.5806, + "eval_samples_per_second": 15.663, + "eval_steps_per_second": 1.993, + "eval_wer": 1.0, + "step": 500 + }, + { + "epoch": 3.39, + "learning_rate": 9.94e-05, + "loss": 1.7717, + "step": 1000 + }, + { + "epoch": 3.39, + "eval_loss": 1.053725004196167, + "eval_runtime": 25.0326, + "eval_samples_per_second": 15.38, + "eval_steps_per_second": 1.957, + "eval_wer": 0.7996899224806202, + "step": 1000 + }, + { + "epoch": 5.08, + "learning_rate": 9.370700636942676e-05, + "loss": 0.8188, + "step": 1500 + }, + { + "epoch": 5.08, + "eval_loss": 0.723295271396637, + "eval_runtime": 24.8888, + "eval_samples_per_second": 15.469, + "eval_steps_per_second": 1.969, + "eval_wer": 0.6505426356589147, + "step": 1500 + }, + { + "epoch": 6.78, + "learning_rate": 8.73375796178344e-05, + "loss": 0.5633, + "step": 2000 + }, + { + "epoch": 6.78, + "eval_loss": 0.6577075719833374, + "eval_runtime": 24.4626, + "eval_samples_per_second": 15.738, + "eval_steps_per_second": 2.003, + "eval_wer": 0.6083720930232558, + "step": 2000 + }, + { + "epoch": 8.47, + "learning_rate": 8.096815286624205e-05, + "loss": 0.4201, + "step": 2500 + }, + { + "epoch": 8.47, + "eval_loss": 0.6801705360412598, + "eval_runtime": 23.9758, + "eval_samples_per_second": 16.058, + "eval_steps_per_second": 2.044, + "eval_wer": 0.5937984496124031, + "step": 2500 + }, + { + "epoch": 10.17, + "learning_rate": 7.459872611464968e-05, + "loss": 0.3465, + "step": 3000 + }, + { + "epoch": 10.17, + "eval_loss": 0.734240710735321, + "eval_runtime": 24.3607, + "eval_samples_per_second": 15.804, + "eval_steps_per_second": 2.011, + "eval_wer": 0.5792248062015504, + "step": 3000 + }, + { + "epoch": 11.86, + "learning_rate": 6.822929936305733e-05, + "loss": 0.2812, + "step": 3500 + }, + { + "epoch": 11.86, + "eval_loss": 0.654410183429718, + "eval_runtime": 25.1886, + "eval_samples_per_second": 15.285, + "eval_steps_per_second": 1.945, + "eval_wer": 0.56, + "step": 3500 + }, + { + "epoch": 13.56, + "learning_rate": 6.185987261146497e-05, + "loss": 0.2362, + "step": 4000 + }, + { + "epoch": 13.56, + "eval_loss": 0.6740626692771912, + "eval_runtime": 24.8977, + "eval_samples_per_second": 15.463, + "eval_steps_per_second": 1.968, + "eval_wer": 0.5311627906976745, + "step": 4000 + }, + { + "epoch": 15.25, + "learning_rate": 5.549044585987262e-05, + "loss": 0.2042, + "step": 4500 + }, + { + "epoch": 15.25, + "eval_loss": 0.7329800724983215, + "eval_runtime": 24.5766, + "eval_samples_per_second": 15.665, + "eval_steps_per_second": 1.994, + "eval_wer": 0.5221705426356589, + "step": 4500 + }, + { + "epoch": 16.95, + "learning_rate": 4.912101910828026e-05, + "loss": 0.1881, + "step": 5000 + }, + { + "epoch": 16.95, + "eval_loss": 0.7085126042366028, + "eval_runtime": 25.0265, + "eval_samples_per_second": 15.384, + "eval_steps_per_second": 1.958, + "eval_wer": 0.5184496124031007, + "step": 5000 + }, + { + "epoch": 18.64, + "learning_rate": 4.27515923566879e-05, + "loss": 0.1632, + "step": 5500 + }, + { + "epoch": 18.64, + "eval_loss": 0.6873669624328613, + "eval_runtime": 24.3859, + "eval_samples_per_second": 15.788, + "eval_steps_per_second": 2.009, + "eval_wer": 0.5255813953488372, + "step": 5500 + }, + { + "epoch": 20.34, + "learning_rate": 3.638216560509554e-05, + "loss": 0.1502, + "step": 6000 + }, + { + "epoch": 20.34, + "eval_loss": 0.7731661200523376, + "eval_runtime": 25.086, + "eval_samples_per_second": 15.347, + "eval_steps_per_second": 1.953, + "eval_wer": 0.5193798449612403, + "step": 6000 + }, + { + "epoch": 22.03, + "learning_rate": 3.0012738853503187e-05, + "loss": 0.1338, + "step": 6500 + }, + { + "epoch": 22.03, + "eval_loss": 0.7070124745368958, + "eval_runtime": 24.9391, + "eval_samples_per_second": 15.438, + "eval_steps_per_second": 1.965, + "eval_wer": 0.5041860465116279, + "step": 6500 + }, + { + "epoch": 23.73, + "learning_rate": 2.3656050955414013e-05, + "loss": 0.1295, + "step": 7000 + }, + { + "epoch": 23.73, + "eval_loss": 0.7655993103981018, + "eval_runtime": 24.8534, + "eval_samples_per_second": 15.491, + "eval_steps_per_second": 1.972, + "eval_wer": 0.4951937984496124, + "step": 7000 + }, + { + "epoch": 25.42, + "learning_rate": 1.7286624203821657e-05, + "loss": 0.1143, + "step": 7500 + }, + { + "epoch": 25.42, + "eval_loss": 0.7407109141349792, + "eval_runtime": 24.926, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.966, + "eval_wer": 0.4951937984496124, + "step": 7500 + }, + { + "epoch": 27.12, + "learning_rate": 1.09171974522293e-05, + "loss": 0.104, + "step": 8000 + }, + { + "epoch": 27.12, + "eval_loss": 0.7474448680877686, + "eval_runtime": 25.1716, + "eval_samples_per_second": 15.295, + "eval_steps_per_second": 1.947, + "eval_wer": 0.4846511627906977, + "step": 8000 + }, + { + "epoch": 28.81, + "learning_rate": 4.5477707006369424e-06, + "loss": 0.1077, + "step": 8500 + }, + { + "epoch": 28.81, + "eval_loss": 0.7391781210899353, + "eval_runtime": 24.7928, + "eval_samples_per_second": 15.529, + "eval_steps_per_second": 1.976, + "eval_wer": 0.47968992248062015, + "step": 8500 + } + ], + "max_steps": 8850, + "num_train_epochs": 30, + "total_flos": 2.7043654893523635e+18, + "trial_name": null, + "trial_params": null +} diff --git a/wav2vec2_esp_5h/checkpoint-8500/training_args.bin b/wav2vec2_esp_5h/checkpoint-8500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..32c3e368cba4588bfa2535cdc8cdc42bcf6b164c Binary files /dev/null and b/wav2vec2_esp_5h/checkpoint-8500/training_args.bin differ diff --git a/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/1678232392.915689/events.out.tfevents.1678232392.mint b/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/1678232392.915689/events.out.tfevents.1678232392.mint new file mode 100644 index 0000000000000000000000000000000000000000..64316ba8649a349c63dcba8e808590bc28e12dde Binary files /dev/null and b/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/1678232392.915689/events.out.tfevents.1678232392.mint differ diff --git a/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/events.out.tfevents.1678232392.mint b/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/events.out.tfevents.1678232392.mint new file mode 100644 index 0000000000000000000000000000000000000000..d3198f07704b0f38c55ee2057fc75fd3c69f33b8 Binary files /dev/null and b/wav2vec2_esp_5h/runs/Mar08_08-38-40_mint/events.out.tfevents.1678232392.mint differ