"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 106
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "rand_int = random.randint(0, len(common_voice_train)-1)\n",
+ "\n",
+ "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n",
+ "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n",
+ "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "EE5jG9PVZmWU",
+ "outputId": "53136991-5dda-4207-b3c5-8340110589fb"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Target text: pokazać ma so w tutym zwisku tež na prócowanja sakskeje\n",
+ "Input array shape: (87552,)\n",
+ "Sampling rate: 16000\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def prepare_dataset(batch):\n",
+ " audio = batch[\"audio\"]\n",
+ "\n",
+ " # batched output is \"un-batched\"\n",
+ " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n",
+ " batch[\"input_length\"] = len(batch[\"input_values\"])\n",
+ " \n",
+ " with processor.as_target_processor():\n",
+ " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n",
+ " return batch"
+ ],
+ "metadata": {
+ "id": "XobzwvyNZpxv"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names)\n",
+ "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 88,
+ "referenced_widgets": [
+ "33e9bcd77cdd4182a98c135118a29a0d",
+ "5d7414d13c904bf9bd3c9900d9256c8b",
+ "4ec83b8fabba46a9a55327262a11523c",
+ "ff82725e881b482b9e78199ea4469d85",
+ "8ec76c12495b402cb16dd4a400d3e185",
+ "fab742e71b68410ebe2f8c2d2074a61e",
+ "7f71a32f7395417f8eb43852737061a4",
+ "d539de5e75f647609659cc1c830b5a82",
+ "0bea115c4c1943608097893a383bc507",
+ "1fdacd9886734bfbb5ebc664829e2a8f",
+ "54a0a93cac8e4512ac213050caae0aad"
+ ]
+ },
+ "id": "A7gT1EQMZsvz",
+ "outputId": "f3b6a1e4-9f86-42a1-8ce4-482d31f03aaf"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "33e9bcd77cdd4182a98c135118a29a0d",
+ "version_minor": 0,
+ "version_major": 2
+ },
+ "text/plain": [
+ "0ex [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/hsb/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-2397f7d3fbbc5fa8.arrow\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Training"
+ ],
+ "metadata": {
+ "id": "Hu8imi4QZx3f"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import torch\n",
+ "\n",
+ "from dataclasses import dataclass, field\n",
+ "from typing import Any, Dict, List, Optional, Union\n",
+ "\n",
+ "@dataclass\n",
+ "class DataCollatorCTCWithPadding:\n",
+ " \"\"\"\n",
+ " Data collator that will dynamically pad the inputs received.\n",
+ " Args:\n",
+ " processor (:class:`~transformers.Wav2Vec2Processor`)\n",
+ " The processor used for proccessing the data.\n",
+ " padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n",
+ " Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n",
+ " among:\n",
+ " * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n",
+ " sequence if provided).\n",
+ " * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n",
+ " maximum acceptable input length for the model if that argument is not provided.\n",
+ " * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n",
+ " different lengths).\n",
+ " \"\"\"\n",
+ "\n",
+ " processor: Wav2Vec2Processor\n",
+ " padding: Union[bool, str] = True\n",
+ "\n",
+ " def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
+ " # split inputs and labels since they have to be of different lenghts and need\n",
+ " # different padding methods\n",
+ " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n",
+ " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
+ "\n",
+ " batch = self.processor.pad(\n",
+ " input_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ " with self.processor.as_target_processor():\n",
+ " labels_batch = self.processor.pad(\n",
+ " label_features,\n",
+ " padding=self.padding,\n",
+ " return_tensors=\"pt\",\n",
+ " )\n",
+ "\n",
+ " # replace padding with -100 to ignore loss correctly\n",
+ " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
+ "\n",
+ " batch[\"labels\"] = labels\n",
+ "\n",
+ " return batch"
+ ],
+ "metadata": {
+ "id": "rAYCcrJFZubm"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)"
+ ],
+ "metadata": {
+ "id": "k05sM3rsZzGS"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "wer_metric = load_metric(\"wer\")"
+ ],
+ "metadata": {
+ "id": "T1zgf2ZKZ1et"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def compute_metrics(pred):\n",
+ " pred_logits = pred.predictions\n",
+ " pred_ids = np.argmax(pred_logits, axis=-1)\n",
+ "\n",
+ " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n",
+ "\n",
+ " pred_str = processor.batch_decode(pred_ids)\n",
+ " # we do not want to group tokens when computing the metrics\n",
+ " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n",
+ "\n",
+ " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n",
+ "\n",
+ " return {\"wer\": wer}"
+ ],
+ "metadata": {
+ "id": "2J_feilIZ3Jf"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import Wav2Vec2ForCTC\n",
+ "\n",
+ "model = Wav2Vec2ForCTC.from_pretrained(\n",
+ " \"facebook/wav2vec2-xls-r-300m\", \n",
+ " attention_dropout=0.04,\n",
+ " hidden_dropout=0.04,\n",
+ " feat_proj_dropout=0.04,\n",
+ " mask_time_prob=0.45,\n",
+ " layerdrop=0.0,\n",
+ " ctc_loss_reduction=\"mean\", \n",
+ " pad_token_id=processor.tokenizer.pad_token_id,\n",
+ " vocab_size=len(processor.tokenizer),\n",
+ ")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "fEzH8f_lZ5FG",
+ "outputId": "c5808890-dac7-4f9c-9a16-7a4118d409b6"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.04,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"mean\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.04,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.04,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.0,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.45,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 46,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.1\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 47,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n",
+ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['project_hid.bias', 'project_q.bias', 'quantizer.codevectors', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_hid.weight']\n",
+ "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.freeze_feature_extractor()"
+ ],
+ "metadata": {
+ "id": "h7MYlVI1Z-w8",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "7980d864-3a23-45cc-fd0e-2b5bc0a16a80"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1703: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n",
+ " FutureWarning,\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import TrainingArguments\n",
+ "\n",
+ "training_args = TrainingArguments(\n",
+ " output_dir=repo_name,\n",
+ " group_by_length=True,\n",
+ " per_device_train_batch_size=16,\n",
+ " gradient_accumulation_steps=2,\n",
+ " evaluation_strategy=\"steps\",\n",
+ " num_train_epochs=50,\n",
+ " gradient_checkpointing=True,\n",
+ " fp16=True,\n",
+ " save_steps=100,\n",
+ " eval_steps=100,\n",
+ " logging_steps=100,\n",
+ " learning_rate=4.5e-4,\n",
+ " warmup_steps=500,\n",
+ " save_total_limit=2,\n",
+ " push_to_hub=True,\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "DapYDcW4Z_Z0",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "5277a6ae-bccb-4665-99e3-34470b0e087b"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "PyTorch: setting up devices\n",
+ "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import Trainer\n",
+ "\n",
+ "trainer = Trainer(\n",
+ " model=model,\n",
+ " data_collator=data_collator,\n",
+ " args=training_args,\n",
+ " compute_metrics=compute_metrics,\n",
+ " train_dataset=common_voice_train,\n",
+ " eval_dataset=common_voice_test,\n",
+ " tokenizer=processor.feature_extractor,\n",
+ ")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "jHiXuohDaD_5",
+ "outputId": "5e9a9de1-968a-47b3-b544-687cfa9923fb"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/content/wav2vec2-large-xls-r-300m-hsb-v1 is already a clone of https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+ "Using amp half precision backend\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trainer.train()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "27TZ79d_aIgO",
+ "outputId": "25b28e4c-0ef6-48fc-f6c5-0e849b63be37"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " FutureWarning,\n",
+ "***** Running training *****\n",
+ " Num examples = 980\n",
+ " Num Epochs = 50\n",
+ " Instantaneous batch size per device = 16\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 32\n",
+ " Gradient Accumulation steps = 2\n",
+ " Total optimization steps = 1550\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [1550/1550 3:17:32, Epoch 50/50]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ " Validation Loss | \n",
+ " Wer | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 100 | \n",
+ " 8.972000 | \n",
+ " 3.749781 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 200 | \n",
+ " 3.340100 | \n",
+ " 3.232006 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 300 | \n",
+ " 3.204600 | \n",
+ " 3.174072 | \n",
+ " 0.980611 | \n",
+ "
\n",
+ " \n",
+ " 400 | \n",
+ " 2.403100 | \n",
+ " 1.057930 | \n",
+ " 0.899577 | \n",
+ "
\n",
+ " \n",
+ " 500 | \n",
+ " 1.042700 | \n",
+ " 0.798919 | \n",
+ " 0.755655 | \n",
+ "
\n",
+ " \n",
+ " 600 | \n",
+ " 0.741000 | \n",
+ " 0.640519 | \n",
+ " 0.629878 | \n",
+ "
\n",
+ " \n",
+ " 700 | \n",
+ " 0.569900 | \n",
+ " 0.612935 | \n",
+ " 0.592841 | \n",
+ "
\n",
+ " \n",
+ " 800 | \n",
+ " 0.460700 | \n",
+ " 0.654812 | \n",
+ " 0.569476 | \n",
+ "
\n",
+ " \n",
+ " 900 | \n",
+ " 0.382700 | \n",
+ " 0.626791 | \n",
+ " 0.519016 | \n",
+ "
\n",
+ " \n",
+ " 1000 | \n",
+ " 0.328200 | \n",
+ " 0.591850 | \n",
+ " 0.501616 | \n",
+ "
\n",
+ " \n",
+ " 1100 | \n",
+ " 0.276400 | \n",
+ " 0.595303 | \n",
+ " 0.480487 | \n",
+ "
\n",
+ " \n",
+ " 1200 | \n",
+ " 0.233500 | \n",
+ " 0.571745 | \n",
+ " 0.472782 | \n",
+ "
\n",
+ " \n",
+ " 1300 | \n",
+ " 0.210600 | \n",
+ " 0.567448 | \n",
+ " 0.456873 | \n",
+ "
\n",
+ " \n",
+ " 1400 | \n",
+ " 0.185900 | \n",
+ " 0.568466 | \n",
+ " 0.450162 | \n",
+ "
\n",
+ " \n",
+ " 1500 | \n",
+ " 0.159200 | \n",
+ " 0.568352 | \n",
+ " 0.440219 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100/preprocessor_config.json\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/preprocessor_config.json\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200/preprocessor_config.json\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-100] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-300] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-400] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-500] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-600] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-700] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-800] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-900] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1000] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1100] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1400/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1200] due to args.save_total_limit\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 418\n",
+ " Batch size = 8\n",
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1500/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-hsb-v1/checkpoint-1300] due to args.save_total_limit\n",
+ "\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=1550, training_loss=1.4571432159792992, metrics={'train_runtime': 11862.6013, 'train_samples_per_second': 4.131, 'train_steps_per_second': 0.131, 'total_flos': 1.0091977567651906e+19, 'train_loss': 1.4571432159792992, 'epoch': 50.0})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 118
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trainer.push_to_hub()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 356,
+ "referenced_widgets": [
+ "7002a26a8625455682af2d92b4f4f3b3",
+ "b5c702735ce84403ad67216c3cb872a9",
+ "c0522846dd85473590e9e3fd2c522335",
+ "84909a89eb6140339979ad3e58a708d1",
+ "44108700a6bd4789b1ad62a01263bb11",
+ "a8f0555374264ba79f38d71d91f5ff6f",
+ "51846bf6f7b443d88f6c884df9bcfe12",
+ "0dabd4f119d34a8dafeefc125c46e52d",
+ "0fd787cdff1a4096826d10efa3676aca",
+ "55d494df8f2d48d1ae4654d0833afedc",
+ "b4840ffc785d41c7b193cdd7c0055191",
+ "c1c8855ab8df474ea1e4bd078060f27b",
+ "829a4957c2ee4795a1ae71015a7b3393",
+ "9f09da2d4fef4a08a8bb0cfdbd3ded58",
+ "b244af9b23894dcd8d58624fc00e2f9d",
+ "16b8dadd139f4a52b489c84be1f9942b",
+ "69ac19f932d3483dabdf8a1c1b5d1554",
+ "4f8c1af0353942d9848378e0b79d6455",
+ "70bd992691ea4075b98890b6e9f2e290",
+ "6c27d46d0c604325bde1e84735264896",
+ "4ada050cc71a43a8ad80cd6a07483ff7",
+ "4496fd5850ef4f32984f88959414f9bf"
+ ]
+ },
+ "id": "qKKeV9gEaLHr",
+ "outputId": "26794641-f90d-4f24-81a1-8266ab1e9559"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Saving model checkpoint to wav2vec2-large-xls-r-300m-hsb-v1\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/config.json\n",
+ "Model weights saved in wav2vec2-large-xls-r-300m-hsb-v1/pytorch_model.bin\n",
+ "Configuration saved in wav2vec2-large-xls-r-300m-hsb-v1/preprocessor_config.json\n",
+ "Several commits (2) will be pushed upstream.\n",
+ "The progress bars may be unreliable.\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7002a26a8625455682af2d92b4f4f3b3",
+ "version_minor": 0,
+ "version_major": 2
+ },
+ "text/plain": [
+ "Upload file pytorch_model.bin: 0%| | 3.36k/1.18G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c1c8855ab8df474ea1e4bd078060f27b",
+ "version_minor": 0,
+ "version_major": 2
+ },
+ "text/plain": [
+ "Upload file runs/Jan31_00-26-59_70b8475e937e/events.out.tfevents.1643588864.70b8475e937e.72.2: 28%|##8 …"
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "To https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1\n",
+ " 45a5465..d251b08 main -> main\n",
+ "\n",
+ "Dropping the following result as it does not have all the necessary fields:\n",
+ "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'hsb'}}\n",
+ "To https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1\n",
+ " d251b08..f6c95b2 main -> main\n",
+ "\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ },
+ "text/plain": [
+ "'https://huggingface.co/DrishtiSharma/wav2vec2-large-xls-r-300m-hsb-v1/commit/d251b08a878f40381635bc9d1653edb51bb97ad4'"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 119
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "vMeJybu4aPpa"
+ },
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file