Training in progress, step 200

Browse files

Files changed (10) hide show

added_tokens.json +4 -0
breeze-listen-w2v2-ml.log +121 -0
config.json +108 -0
model.safetensors +3 -0
preprocessor_config.json +10 -0
special_tokens_map.json +30 -0
tokenizer_config.json +48 -0
train-ctc-model.sh +135 -0
training_args.bin +3 -0
vocab.json +76 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 73,
+  "<s>": 72
+}

breeze-listen-w2v2-ml.log ADDED Viewed

	@@ -0,0 +1,121 @@

+01/29/2024 19:52:08 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
+01/29/2024 19:52:08 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
+_n_gpu=1,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_persistent_workers=False,
+dataloader_pin_memory=True,
+dataloader_prefetch_factor=None,
+ddp_backend=None,
+ddp_broadcast_buffers=None,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+dispatch_batches=None,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=200,
+evaluation_strategy=IntervalStrategy.STEPS,
+fp16=True,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+gradient_checkpointing_kwargs=None,
+greater_is_better=None,
+group_by_length=True,
+half_precision_backend=auto,
+hub_always_push=False,
+hub_model_id=simpragma/breeze-listen-w2v2-ml,
+hub_private_repo=False,
+hub_strategy=HubStrategy.EVERY_SAVE,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+include_num_input_tokens_seen=False,
+include_tokens_per_second=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.001,
+length_column_name=input_length,
+load_best_model_at_end=False,
+local_rank=0,
+log_level=passive,
+log_level_replica=warning,
+log_on_each_node=True,
+logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml/runs/Jan29_19-52-08_knight,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=500,
+logging_strategy=IntervalStrategy.STEPS,
+lr_scheduler_kwargs={},
+lr_scheduler_type=SchedulerType.LINEAR,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+neftune_noise_alpha=None,
+no_cuda=False,
+num_train_epochs=4.0,
+optim=OptimizerNames.ADAMW_BNB,
+optim_args=None,
+output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=8,
+per_device_train_batch_size=4,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=[],
+resume_from_checkpoint=None,
+run_name=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-ml,
+save_on_each_node=False,
+save_only_model=False,
+save_safetensors=True,
+save_steps=200,
+save_strategy=IntervalStrategy.STEPS,
+save_total_limit=3,
+seed=42,
+skip_memory_metrics=True,
+split_batches=False,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_cpu=False,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_mps_device=False,
+warmup_ratio=0.0,
+warmup_steps=100,
+weight_decay=0.0,
+)

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.05,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.05,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.05,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 71,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.0.dev0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 74,
+  "xvector_output_dim": 512
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8646b820c7003d6bcb643ef8b492cd503c3de511327cb68c36f2fb036f8d2b3
+size 3859111256

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": true,
+    "single_word": false
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "added_tokens_decoder": {
+    "70": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "73": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "mal",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

train-ctc-model.sh ADDED Viewed

	@@ -0,0 +1,135 @@

+#! /usr/bin/bash
+#
+# This script runs the speech recognition training using DeepSpeed
+#
+# CHANGE THESE AS PER YOUR REQUIREMENTS
+# LANG as it is referred in the dataset
+LANG=ml			# 2 letter ISO code for the language
+LANG_ISO_3=mal		# 3 letter ISO code for the language
+LANGUAGE=Malayalam	# Full language name as per Whisper convention
+# For Mozilla Commonvoice datasets, uncomment the following
+DATASET="mozilla-foundation/common_voice_16_0"
+TEXT_COLUMN="sentence"
+# For Google Fleurs datasets, uncomment the following
+# DATASET="google/fleurs"
+# TEXT_COLUMN="transcription"
+# Custom datasets
+#DATASET="parambharat/kannada_asr_corpus"
+#TEXT_COLUMN=${TEXT_COLUMN:-"sentence"}
+# Function to get fine tuning learning rate
+get_fine_tuning_lr() {
+    local model_size=$1
+    local lr
+    case $model_size in
+        "tiny")
+            lr="3.75e-5"
+            ;;
+        "base")
+            lr="2.5e-5"
+            ;;
+        "small")
+            lr="1.25e-5"
+            ;;
+        "medium")
+            lr="6.25e-6"
+            ;;
+        "large")
+            lr="4.375e-6"
+            ;;
+        "large-v2")
+            lr="5e-6"
+            ;;
+        *)
+            echo "Invalid model size"
+            exit 1
+            ;;
+    esac
+    echo $lr
+}
+SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
+SCRIPT_DIR=$(realpath $(dirname "${BASH_SOURCE[0]}"))
+# Port to use
+export MASTER_PORT="${MASTER_PORT:-29500}"
+echo "Using master_port for deepspeech: ${MASTER_PORT}"
+export "MASTER_ADDR"="localhost"
+export "RANK"="0"
+export "LOCAL_RANK"="0"
+export "WORLD_SIZE"="1"
+# Base model variant
+MODEL=w2v2
+# Model names and other stuff
+BASE_MODEL="facebook/mms-1b-all"
+JUST_LANG=${LANG%%_*}
+MY_MODEL="breeze-listen-${MODEL}-${JUST_LANG}"
+OUTDIR="/cosmos/home/sp-operator/ai/training/models/simpragma/${MY_MODEL}"
+echo "OUTDIR: ${OUTDIR}"
+# Training parameters you can tweak. Feel free to directly change any of the parameters below.
+MAX_EPOCHS=4
+TRAIN_BATCH_SIZE=4
+EVAL_BATCH_SIZE=4
+LEARNING_RATE="1e-3"
+EVAL_STEPS="200"
+SAVE_STEPS="200"
+# Create dir
+mkdir -p ${OUTDIR}
+#	--overwrite_output_dir \
+# If you want to resume from existing checkpoint, include the following argument as well. Modify the checkpoint directory.
+# --resume_from_checkpoint="${MY_MODEL}/checkpoint-400" \
+echo "================ TRAINING: START ================"
+python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
+	--dataset_name="${DATASET}" \
+	--model_name_or_path="${BASE_MODEL}" \
+	--dataset_config_name="${LANG}" \
+	--target_language="${LANG_ISO_3}"	\
+	--output_dir="${OUTDIR}" \
+	--num_train_epochs="${MAX_EPOCHS}" \
+	--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
+	--learning_rate="${LEARNING_RATE}" \
+	--warmup_steps="100" \
+	--evaluation_strategy="steps" \
+	--text_column_name="${TEXT_COLUMN}" \
+	--length_column_name="input_length" \
+	--save_steps="${SAVE_STEPS}" \
+	--eval_steps="${EVAL_STEPS}" \
+	--save_total_limit="3" \
+	--optim="adamw_bnb_8bit"	\
+	--hub_model_id "simpragma/${MY_MODEL}" \
+	--gradient_checkpointing \
+	--chars_to_ignore , ? . ! - \; \: \" “ % ‘ ” � \
+	--fp16 \
+	--group_by_length \
+	--do_train 	\
+	--do_eval \
+	--push_to_hub	\
+	| tee ${OUTDIR}/${MY_MODEL}.log
+# Copy the script to the output directory so that we can recreate the model
+cp ${SCRIPT_PATH} ${OUTDIR}
+echo "================ TRAINING: DONE ================"
+exit 0

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5285fada4bc7d4d9e8126ada302ea99fc90ca747677c26a4e7da55b072d8194e
+size 4856

vocab.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "mal": {
+    "[PAD]": 71,
+    "[UNK]": 70,
+    "|": 0,
+    "ം": 1,
+    "ഃ": 2,
+    "അ": 3,
+    "ആ": 4,
+    "ഇ": 5,
+    "ഈ": 6,
+    "ഉ": 7,
+    "ഊ": 8,
+    "എ": 9,
+    "ഏ": 10,
+    "ഐ": 11,
+    "ഒ": 12,
+    "ഓ": 13,
+    "ക": 14,
+    "ഖ": 15,
+    "ഗ": 16,
+    "ഘ": 17,
+    "ങ": 18,
+    "ച": 19,
+    "ഛ": 20,
+    "ജ": 21,
+    "ഞ": 22,
+    "ട": 23,
+    "ഠ": 24,
+    "ഡ": 25,
+    "ഢ": 26,
+    "ണ": 27,
+    "ത": 28,
+    "ഥ": 29,
+    "ദ": 30,
+    "ധ": 31,
+    "ന": 32,
+    "പ": 33,
+    "ഫ": 34,
+    "ബ": 35,
+    "ഭ": 36,
+    "മ": 37,
+    "യ": 38,
+    "ര": 39,
+    "റ": 40,
+    "ല": 41,
+    "ള": 42,
+    "ഴ": 43,
+    "വ": 44,
+    "ശ": 45,
+    "ഷ": 46,
+    "സ": 47,
+    "ഹ": 48,
+    "ാ": 49,
+    "ി": 50,
+    "ീ": 51,
+    "ു": 52,
+    "ൂ": 53,
+    "ൃ": 54,
+    "െ": 55,
+    "േ": 56,
+    "ൈ": 57,
+    "ൊ": 58,
+    "ോ": 59,
+    "ൌ": 60,
+    "്": 61,
+    "ൗ": 62,
+    "ൺ": 63,
+    "ൻ": 64,
+    "ർ": 65,
+    "ൽ": 66,
+    "ൾ": 67,
+    "ൿ": 68,
+    "’": 69
+  }
+}