Granther
/

prompt-tuned-phi3

Text Generation

Transformers

Safetensors

Inference Endpoints

Model card Files Files and versions Community

Granther commited on Jun 30

Commit

fd1f3e8

•

1 Parent(s): ab5911d

Upload prompt_tune_phi3.ipynb with huggingface_hub

Browse files

Files changed (1) hide show

prompt_tune_phi3.ipynb +122 -71

prompt_tune_phi3.ipynb CHANGED Viewed

@@ -36,13 +36,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "f1cc378f-afb6-441f-a4c6-2ec427b4cd4b",
    "metadata": {},
    "outputs": [],
    "source": [
     "from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup\n",
-    "from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType\n",
     "import torch\n",
     "from datasets import load_dataset\n",
     "import os\n",
@@ -54,27 +54,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "e4ab50d7-a4c9-4246-acd8-8875b87fe0da",
    "metadata": {},
-   "outputs": [],
    "source": [
     "notebook_login()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "id": "8a1cb1f9-b89d-4cac-a595-44e1e0ef85b2",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "CommitInfo(commit_url='https://huggingface.co/Granther/prompt-tuned-phi3/commit/7ea57da9a4eccf3794c58bb4317df1c97a0fe2c8', commit_message='Upload prompt_tune_phi3.ipynb with huggingface_hub', commit_description='', oid='7ea57da9a4eccf3794c58bb4317df1c97a0fe2c8', pr_url=None, pr_revision=None, pr_num=None)"
       ]
      },
-     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -90,7 +105,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
    "id": "6cad1e5c-038f-4e75-8c3f-8ce0a43713a4",
    "metadata": {},
    "outputs": [],
@@ -103,7 +118,7 @@
     "    peft_type=PeftType.PROMPT_TUNING, # what kind of peft\n",
     "    task_type=TaskType.CAUSAL_LM,     # config task\n",
     "    prompt_tuning_init=PromptTuningInit.TEXT, # Set to 'TEXT' to use prompt_tuning_init_text\n",
-    "    num_virtual_tokens=8, # x times the number of hidden transformer layers\n",
     "    prompt_tuning_init_text=\"Classify if the tweet is a complaint or not:\",\n",
     "    tokenizer_name_or_path=model_id\n",
     ")\n",
@@ -123,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "id": "6f677839-ef23-428a-bcfe-f596590804ca",
    "metadata": {},
    "outputs": [],
@@ -133,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "id": "c0c05613-7941-4959-ada9-49ed1093bec4",
    "metadata": {},
    "outputs": [
@@ -143,7 +158,7 @@
        "['Unlabeled', 'complaint', 'no complaint']"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -155,24 +170,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "id": "14e2bc8b-b4e3-49c9-ae2b-5946e412caa5",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "11da1eb81527428a95c41816f5bf459f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=10):   0%|          | 0/3399 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/plain": [
@@ -182,7 +183,7 @@
        " 'text_label': 'no complaint'}"
       ]
      },
-     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -201,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
    "id": "19f0865d-e490-4c9f-a5f4-e781ed270f47",
    "metadata": {},
    "outputs": [
@@ -218,7 +219,7 @@
        "[1, 853, 29880, 24025]"
       ]
      },
-     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -250,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "id": "03f05467-dce3-4e42-ab3b-c39ba620e164",
    "metadata": {},
    "outputs": [],
@@ -291,14 +292,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
    "id": "72ddca5f-7bce-4342-9414-9dd9d41d9dec",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "05958c1cf67d413b9085622ace0cb799",
        "version_major": 2,
        "version_minor": 0
       },
@@ -312,7 +313,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "05e7c3181c20464492f2ec4ced190fd4",
        "version_major": 2,
        "version_minor": 0
       },
@@ -337,7 +338,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
    "id": "40cea6bc-e898-4d86-a6bf-5afc3a647e07",
    "metadata": {},
    "outputs": [],
@@ -362,14 +363,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
    "id": "a4c529e4-d8ae-42b2-a658-f76d183bb264",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "58f2ef57b8ea49c2a26d4361ce4a5983",
        "version_major": 2,
        "version_minor": 0
       },
@@ -391,7 +399,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "trainable params: 24,576 || all params: 3,821,104,128 || trainable%: 0.0006\n",
       "None\n"
      ]
     }
@@ -406,7 +414,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
    "id": "3289e4e3-9b9a-4256-921b-5df21d18344e",
    "metadata": {},
    "outputs": [],
@@ -421,7 +429,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
    "id": "e7939d75-c6b9-47a8-b1a3-88f7c33ff121",
    "metadata": {},
    "outputs": [
@@ -429,8 +437,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 7/7 [00:00<00:00, 10.97it/s]\n",
-      "100%|██████████| 425/425 [00:13<00:00, 31.61it/s]\n"
      ]
     },
     {
@@ -444,8 +453,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 7/7 [00:00<00:00, 12.02it/s]\n",
-      "100%|██████████| 425/425 [00:13<00:00, 31.35it/s]\n"
      ]
     },
     {
@@ -459,8 +468,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 7/7 [00:00<00:00, 12.70it/s]\n",
-      "100%|██████████| 425/425 [00:13<00:00, 31.66it/s]\n"
      ]
     },
     {
@@ -474,8 +483,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 7/7 [00:00<00:00, 11.85it/s]\n",
-      "100%|██████████| 425/425 [00:13<00:00, 32.45it/s]\n"
      ]
     },
     {
@@ -489,8 +498,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 7/7 [00:00<00:00, 12.53it/s]\n",
-      "100%|██████████| 425/425 [00:13<00:00, 32.38it/s]"
      ]
     },
     {
@@ -546,7 +555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
    "id": "806d36f8-499e-4af8-b717-68e5d849866d",
    "metadata": {},
    "outputs": [],
@@ -556,14 +565,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "13db780a-fe20-4b23-b6cb-17118f7b695e",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d8f94426025f4ad89847ac7e983cec42",
        "version_major": 2,
        "version_minor": 0
       },
@@ -573,48 +582,90 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
-     ]
     }
    ],
    "source": [
-    "from transformers import pipeline\n",
-    "device = 'cuda'\n",
-    "pipe = pipeline(model='model', device=device, max_length=100)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "26438301-3601-44f4-bbe4-3c573a1c28be",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n",
-      "You are not running the flash-attention implementation, expect numerical differences.\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "[{'generated_text': \"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?\\n\\n### response\\nI understand your situation and I'm here to help. First, it's important to clarify that as an AI developed by Microsoft, I don't have the authority to directly intervene with your utility bills or the National Grid. However, I can guide you through the steps you should take to address this issue.\\n\\n1\"}]"
       ]
      },
-     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipe(\"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?\")"
    ]
   },
   {

   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "f1cc378f-afb6-441f-a4c6-2ec427b4cd4b",
    "metadata": {},
    "outputs": [],
    "source": [
     "from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup\n",
+    "from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType, PeftConfig\n",
     "import torch\n",
     "from datasets import load_dataset\n",
     "import os\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "e4ab50d7-a4c9-4246-acd8-8875b87fe0da",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7f03fcf3844743fcb41f8bfc9c6c9b70",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "notebook_login()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "8a1cb1f9-b89d-4cac-a595-44e1e0ef85b2",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Granther/prompt-tuned-phi3/commit/ab5911db092a8e53ea24c33f170e8013a8b172aa', commit_message='Upload prompt_tune_phi3.ipynb with huggingface_hub', commit_description='', oid='ab5911db092a8e53ea24c33f170e8013a8b172aa', pr_url=None, pr_revision=None, pr_num=None)"
       ]
      },
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "6cad1e5c-038f-4e75-8c3f-8ce0a43713a4",
    "metadata": {},
    "outputs": [],
     "    peft_type=PeftType.PROMPT_TUNING, # what kind of peft\n",
     "    task_type=TaskType.CAUSAL_LM,     # config task\n",
     "    prompt_tuning_init=PromptTuningInit.TEXT, # Set to 'TEXT' to use prompt_tuning_init_text\n",
+    "    num_virtual_tokens=100, # x times the number of hidden transformer layers\n",
     "    prompt_tuning_init_text=\"Classify if the tweet is a complaint or not:\",\n",
     "    tokenizer_name_or_path=model_id\n",
     ")\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "6f677839-ef23-428a-bcfe-f596590804ca",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "c0c05613-7941-4959-ada9-49ed1093bec4",
    "metadata": {},
    "outputs": [
        "['Unlabeled', 'complaint', 'no complaint']"
       ]
      },
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "14e2bc8b-b4e3-49c9-ae2b-5946e412caa5",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        " 'text_label': 'no complaint'}"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "19f0865d-e490-4c9f-a5f4-e781ed270f47",
    "metadata": {},
    "outputs": [
        "[1, 853, 29880, 24025]"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "03f05467-dce3-4e42-ab3b-c39ba620e164",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "72ddca5f-7bce-4342-9414-9dd9d41d9dec",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5494bc1fbce24646b61e60e119ae1cb2",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "857675d314254672964cafc522e3869f",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "40cea6bc-e898-4d86-a6bf-5afc3a647e07",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "a4c529e4-d8ae-42b2-a658-f76d183bb264",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.\n"
+     ]
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1d09f75f23894968a6acd482a53fc92b",
        "version_major": 2,
        "version_minor": 0
       },
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "trainable params: 307,200 || all params: 3,821,386,752 || trainable%: 0.0080\n",
       "None\n"
      ]
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "3289e4e3-9b9a-4256-921b-5df21d18344e",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "e7939d75-c6b9-47a8-b1a3-88f7c33ff121",
    "metadata": {},
    "outputs": [
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "  0%|          | 0/7 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)\n",
+      "100%|██████████| 7/7 [00:01<00:00,  5.36it/s]\n",
+      "100%|██████████| 425/425 [00:29<00:00, 14.23it/s]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 7/7 [00:00<00:00,  7.66it/s]\n",
+      "100%|██████████| 425/425 [00:29<00:00, 14.26it/s]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 7/7 [00:00<00:00,  7.76it/s]\n",
+      "100%|██████████| 425/425 [00:29<00:00, 14.25it/s]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 7/7 [00:00<00:00,  7.72it/s]\n",
+      "100%|██████████| 425/425 [00:29<00:00, 14.24it/s]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "100%|██████████| 7/7 [00:00<00:00,  7.77it/s]\n",
+      "100%|██████████| 425/425 [00:29<00:00, 14.18it/s]"
      ]
     },
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "806d36f8-499e-4af8-b717-68e5d849866d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
+   "id": "cff41965-fa71-420b-80d8-ce597510f1d3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "821777d6daa442c7a5779f3aff695739",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
+    "from peft import PeftModel, PeftConfig\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer \n",
+    "\n",
+    "#tokenizer = AutoTokenizer.from_pretrained('model')\n",
+    "\n",
+    "config = PeftConfig.from_pretrained('model')\n",
+    "model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)\n",
+    "model = PeftModel.from_pretrained(model, 'model')"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
+   "id": "d8a432c9-9ddb-4bb7-a7f0-c4cadd612535",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "inputs = tokenizer(\n",
+    "    f'{text_col} : {\"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?\"} Label : ',\n",
+    "    return_tensors=\"pt\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "66cfaab3-dc63-4a1e-ab4d-2a687695993d",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1249: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
+      "  warnings.warn(\n"
      ]
     },
+    {
+     "ename": "ValueError",
+     "evalue": "Input length of input_ids is 32, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[15], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[1;32m      4\u001b[0m     inputs \u001b[38;5;241m=\u001b[39m {k: v\u001b[38;5;241m.\u001b[39mto(device) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m inputs\u001b[38;5;241m.\u001b[39mitems()}\n\u001b[0;32m----> 5\u001b[0m     out \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput_ids\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mattention_mask\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1493\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.generate\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1491\u001b[0m             outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mgenerate(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m   1492\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1493\u001b[0m         outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1494\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m   1495\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model_prepare_inputs_for_generation\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1786\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m   1783\u001b[0m         model_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpast_key_values\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m DynamicCache\u001b[38;5;241m.\u001b[39mfrom_legacy_cache(past)\n\u001b[1;32m   1784\u001b[0m         use_dynamic_cache_by_default \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m-> 1786\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_generated_length\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_ids_length\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhas_default_max_length\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1788\u001b[0m \u001b[38;5;66;03m# 7. determine generation mode\u001b[39;00m\n\u001b[1;32m   1789\u001b[0m generation_mode \u001b[38;5;241m=\u001b[39m generation_config\u001b[38;5;241m.\u001b[39mget_generation_mode(assistant_model)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1257\u001b[0m, in \u001b[0;36mGenerationMixin._validate_generated_length\u001b[0;34m(self, generation_config, input_ids_length, has_default_max_length)\u001b[0m\n\u001b[1;32m   1255\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m input_ids_length \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m generation_config\u001b[38;5;241m.\u001b[39mmax_length:\n\u001b[1;32m   1256\u001b[0m     input_ids_string \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdecoder_input_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1257\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1258\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInput length of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minput_ids_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minput_ids_length\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, but `max_length` is set to\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1259\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mgeneration_config\u001b[38;5;241m.\u001b[39mmax_length\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. This can lead to unexpected behavior. You should consider\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1260\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m increasing `max_length` or, better yet, setting `max_new_tokens`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1261\u001b[0m     )\n\u001b[1;32m   1263\u001b[0m \u001b[38;5;66;03m# 2. Min length warnings due to unfeasible parameter combinations\u001b[39;00m\n\u001b[1;32m   1264\u001b[0m min_length_error_suffix \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   1265\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m Generation will stop at the defined maximum length. You should decrease the minimum length and/or \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1266\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mincrease the maximum length.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1267\u001b[0m )\n",
+      "\u001b[0;31mValueError\u001b[0m: Input length of input_ids is 32, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`."
+     ]
+    }
+   ],
+   "source": [
+    "model.to(device)\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    inputs = {k: v.to(device) for k, v in inputs.items()}\n",
+    "    out = model.generate(input_ids=inputs[\"input_ids\"], attention_mask=inputs[\"attention_mask\"])#, max_new_tokens=10) #, eos_token_id=3)\n",
+    "    #print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "26438301-3601-44f4-bbe4-3c573a1c28be",
+   "metadata": {},
+   "outputs": [
     {
      "data": {
       "text/plain": [
+       "[{'generated_text': '@HMRCcustomers No this is my first job and I am not sure what to do. I have been told that I need to register with HMRC but I am not sure how to do this. Can you please help me?\\n\\n### response\\nTo register with HMRC for your first job, you need to complete a Self Assessment tax return if you are self-employed or have income to report. For employees, you may need to complete'}]"
       ]
      },
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "pipe(\"@HMRCcustomers No this is my first job\")"
    ]
   },
   {