{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "This Notebook is a Stable-diffusion tool which allows you to find similiar tokens from the SD 1.5 vocab.json that you can use for text-to-image generation. Try this Free online SD 1.5 generator with the results: https://perchance.org/fusion-ai-image-generator\n", "\n", "Scroll to the bottom of the notebook to see the guide for how this works." ], "metadata": { "id": "L7JTcbOdBPfh" } }, { "cell_type": "code", "source": [ "# @title ✳️ Load/initialize values\n", "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd /content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n", "#-----#\n", "\n", "#Import the vocab.json\n", "import json\n", "import pandas as pd\n", "with open('vocab.json', 'r') as f:\n", " data = json.load(f)\n", "\n", "_df = pd.DataFrame({'count': data})['count']\n", "\n", "vocab = {\n", " value: key for key, value in _df.items()\n", "}\n", "#-----#\n", "\n", "# Define functions/constants\n", "NUM_TOKENS = 49407\n", "\n", "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "\n", "def token_similarity(A, B):\n", "\n", " #Vector length#\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", "\n", " #----#\n", " result = torch.dot(A,B)/(_A*_B)\n", " #similarity_pcnt = absolute_value(result.item()*100)\n", " similarity_pcnt = result.item()*100\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", " result = f'{similarity_pcnt_aprox} %'\n", " return result\n", "\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", " return token_similarity(A, B)\n", "#----#\n", "\n", "#print(vocab[8922]) #the vocab item for ID 8922\n", "#print(token[8922].shape) #dimension of the token\n", "\n", "mix_with = \"\"\n", "mix_method = \"None\"\n", "\n", "#-------------#\n", "# UNUSED\n", "\n", "# Get the 10 lowest values from a tensor as a string\n", "def get_valleys (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=False)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result\n", "\n", "# Get the 10 highest values from a tensor as a string\n", "def get_peaks (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=True)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result" ], "metadata": { "id": "Ch9puvwKH1s3", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ⚡ Get similiar tokens\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "\n", "# @markdown Write name of token to match against\n", "prompt= \"banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n", "\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "print(input_ids)\n", "\n", "\n", "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n", "\n", "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n", "\n", "id_A = input_ids[1]\n", "A = token[id_A]\n", "_A = LA.vector_norm(A, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (prompt == \"\"):\n", " id_A = -1\n", " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " A = R*(_A/_R)\n", " name_A = 'random_A'\n", "\n", "# @markdown (optional) Mix the token with something else\n", "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n", "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n", "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n", "\n", "# @markdown Limit char size of included token\n", "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "\n", "tokenizer_output = tokenizer(text = mix_with)\n", "input_ids = tokenizer_output['input_ids']\n", "id_C = input_ids[1]\n", "C = token[id_C]\n", "_C = LA.vector_norm(C, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (mix_with == \"\"):\n", " id_C = -1\n", " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " C = R*(_C/_R)\n", " name_C = 'random_C'\n", "\n", "name_A = \"A of random type\"\n", "if (id_A>-1):\n", " name_A = vocab[id_A]\n", "\n", "name_C = \"token C of random type\"\n", "if (id_C>-1):\n", " name_C = vocab[id_C]\n", "\n", "# Peaks feature\n", "#peaks_A = get_valleys(A)\n", "#peaks_C = get_valleys(C)\n", "#print(f\"The elementwise top 10 highest values for A is at indices {peaks_A}\")\n", "#print(\"-------\")\n", "#print(f\"The elementwise top 10 highest values for C is at indices {peaks_C}\")\n", "#print(\"-------\")\n", "#//------//\n", "\n", "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {token_similarity(A, C)}\")\n", "\n", "if (mix_method == \"None\"):\n", " print(\"No operation\")\n", "\n", "if (mix_method == \"Average\"):\n", " A = w*A + (1-w)*C\n", " _A = LA.vector_norm(A, ord=2)\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n", "\n", "if (mix_method == \"Subtract\"):\n", " tmp = w*A - (1-w)*C\n", " _tmp = LA.vector_norm(tmp, ord=2)\n", " A = (_A/_tmp)*tmp\n", " #//---//\n", " _A = LA.vector_norm(A, ord=2)\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n", "\n", "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n", "\n", "dots = torch.zeros(NUM_TOKENS)\n", "for index in range(NUM_TOKENS):\n", " id_B = index\n", " B = token[id_B]\n", " _B = LA.vector_norm(B, ord=2)\n", " result = torch.dot(A,B)/(_A*_B)\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "#----#\n", "if (mix_method == \"Average\"):\n", " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"Subtract\"):\n", " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"None\"):\n", " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n", "\n", "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n", "\n", "# @markdown Set print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print('Here is the result : ')\n", "print('')\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n", " if (print_Divider):\n", " print('--------')\n", "\n", "#Print the sorted list from above result" ], "metadata": { "id": "iWeFnT1gAx6A", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Below image interrogator appends CLIP tokens to either end of the 'must_contain' text , and seeks to maximize similarity with the image encoding.\n", "\n", "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n", "\n", "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n", "\n" ], "metadata": { "id": "IUCuV9RtQpBn" } }, { "cell_type": "code", "source": [ "# @title 🪐🖼️ -> 📝 Slow Recursive Token Image interrogator\n", "\n", "# @markdown # What do you want to to mimic?\n", "use = '🖼️image_encoding from image' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n", "# @markdown --------------------------\n", "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n", "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "\n", "prompt_A = prompt\n", "\n", "from google.colab import files\n", "def upload_files():\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for k, v in uploaded.items():\n", " open(k, 'wb').write(v)\n", " return list(uploaded.keys())\n", "#Get image\n", "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n", "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n", "\n", "\n", "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n", "\n", "# @markdown --------------------------\n", "from PIL import Image\n", "import requests\n", "image_A = \"\"\n", "\n", "#----#\n", "\n", "if(use == '🖼️image_encoding from image'):\n", " if image_url == \"\":\n", " import cv2\n", " from google.colab.patches import cv2_imshow\n", " # Open the image.\n", " if colab_image_path == \"\":\n", " keys = upload_files()\n", " for key in keys:\n", " image_A = cv2.imread(\"/content/sd_tokens/\" + key)\n", " colab_image_path = \"/content/sd_tokens/\" + key\n", " else:\n", " image_A = cv2.imread(\"/content/sd_tokens/\" + colab_image_path)\n", " else:\n", " image_A = Image.open(requests.get(image_url, stream=True).raw)\n", "#------#\n", "\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "from transformers import CLIPProcessor, CLIPModel\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "\n", "\n", "if(use == '🖼️image_encoding from image'):\n", " # Get image features\n", " inputs = processor(images=image_A, return_tensors=\"pt\")\n", " image_features = model.get_image_features(**inputs)\n", " image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n", " name_A = \"the image\"\n", "#-----#\n", "\n", "\n", "if(use == '📝text_encoding from prompt'):\n", " # Get text features\n", " inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n", " text_features_A = model.get_text_features(**inputs)\n", " name_A = prompt\n", "#-----#\n", "\n", "\n", "# @markdown # The output...\n", "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "token_B = must_contain\n", "\n", "# @markdown -----\n", "\n", "# @markdown # Use a range of tokens from the vocab.json (slow method)\n", "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n", "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n", "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n", "\n", "#markdown Limit char size of included token <----- Disabled\n", "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n", "char_range = 50 #param {type:\"slider\", min:0, max: 20, step:1}\n", "\n", "\n", "# markdown # ...or paste prompt items\n", "# markdown Format must be {item1|item2|...}. You can aquire prompt items using the Randomizer in the fusion gen: https://perchance.org/fusion-ai-image-generator\n", "_enable = False # param {\"type\":\"boolean\"}\n", "prompt_items = \"\" # param {\"type\":\"string\",\"placeholder\":\"{item1|item2|...}\"}\n", "#-----#\n", "name_B = must_contain\n", "#-----#\n", "\n", "START = start_search_at_ID\n", "RANGE = min(search_range , 49407 - start_search_at_ID)\n", "\n", "dots = torch.zeros(RANGE)\n", "is_BC = torch.zeros(RANGE)\n", "\n", "import re\n", "\n", "for index in range(RANGE):\n", " id_C = START + index\n", " C = token[id_C]\n", " _C = LA.vector_norm(C, ord=2)\n", " name_C = vocab[id_C]\n", " is_Prefix = 0\n", "\n", "\n", " #Skip if non-AZ characters are found\n", " if re.search(\"\\W/g\" , name_C.replace('', '')):\n", " continue\n", "\n", "\n", " # Decide if we should process prefix/suffix tokens\n", " if name_C.find('')<=-1:\n", " is_Prefix = 1\n", " if restrictions != \"Prefix only\":\n", " continue\n", " else:\n", " if restrictions == \"Prefix only\":\n", " continue\n", " #-----#\n", "\n", " # Decide if char-size is within range\n", " if len(name_C) < min_char_size:\n", " continue\n", " if len(name_C) > min_char_size + char_range:\n", " continue\n", " #-----#\n", " name_CB = must_start_with + name_C + name_B + must_end_with\n", " if is_Prefix>0:\n", " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n", " #-----#\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_CB)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_CB)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim_CB = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", "\n", "\n", " #-----#\n", " if restrictions == \"Prefix only\":\n", " result = sim_CB\n", " result = result.item()\n", " dots[index] = result\n", " continue\n", " #-----#\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_BC)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_BC)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim_BC = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", " result = sim_CB\n", " if(sim_BC > sim_CB):\n", " is_BC[index] = 1\n", " result = sim_BC\n", "\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "#----#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "\n", "# @markdown ----------\n", "# @markdown # Print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n", "print('')\n", "#----#\n", "aheads = \"{\"\n", "trails = \"{\"\n", "tmp = \"\"\n", "#----#\n", "max_sim_ahead = 0\n", "max_sim_trail = 0\n", "sim = 0\n", "max_name_ahead = ''\n", "max_name_trail = ''\n", "#----#\n", "for index in range(min(list_size,RANGE)):\n", " id = START + indices[index].item()\n", " name = vocab[id]\n", " #-----#\n", " if (name.find('')<=-1):\n", " name = name + '-'\n", " else:\n", " name = name.replace('', ' ')\n", " if(is_BC[index]>0):\n", " trails = trails + name + \"|\"\n", " else:\n", " aheads = aheads + name + \"|\"\n", " #----#\n", " sim = sorted[index].item()\n", "\n", " if(is_BC[index]>0):\n", " if sim>max_sim_ahead:\n", " max_sim_ahead = sim\n", " max_name_ahead = name\n", " else:\n", " if sim>max_sim_trail:\n", " max_sim_trail = sim\n", " max_name_trail = name\n", "\n", "#------#\n", "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"\", \" \").replace(\"{&&&&\", \"\")\n", "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"\", \" \").replace(\"{&&&&\", \"\")\n", "max_sim_ahead=max_sim_ahead\n", "max_sim_ahead=max_sim_trail\n", "#-----#\n", "print(f\"place these items ahead of prompt : {aheads}\")\n", "print(\"\")\n", "print(f\"place these items behind the prompt : {trails}\")\n", "print(\"\")\n", "print(f\"max_similarity = {max_sim_ahead} % when using '{max_name_ahead + must_contain}' \")\n", "print(\"\")\n", "print(f\"max_similarity = {max_sim_trail} % when using '{must_contain + max_name_trail}' \")\n", "#-----#\n", "#STEP 2\n", "import random\n", "\n", "names = {}\n", "\n", "NUM_PERMUTATIONS = 4 # 0 1 2 3\n", "dots = torch.zeros(NUM_PERMUTATIONS)\n", "for index in range(NUM_PERMUTATIONS):\n", " name = must_start_with\n", " if index == 0 : name = name + must_contain\n", " if index == 1 : name = name + max_name_ahead + must_contain\n", " if index == 2 : name = name + must_contain + max_name_trail\n", " if index == 3 : name = name + max_name_ahead + must_contain + max_name_trail\n", " name = name + must_end_with\n", " #----#\n", " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " text_features = model.get_text_features(**ids)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " text_features = model.get_text_features(**ids)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", "\n", " dots[index] = sim\n", " names[index] = name\n", "\n", "\n", "#------#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "for index in range(NUM_PERMUTATIONS):\n", " print(names[indices[index].item()])\n", " print(f'similiarity = {round(sorted[index].item(),2)} %')\n", " print('------')\n", "\n", "\n", "\n" ], "metadata": { "collapsed": true, "id": "fi0jRruI0-tu" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 💫 Compare Text encodings\n", "\n", "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "use_token_padding = True # @param {type:\"boolean\"}\n", "\n", "from transformers import CLIPProcessor, CLIPModel\n", "\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "\n", "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n", "text_encoding_A = model.get_text_features(**ids_A)\n", "\n", "\n", "ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n", "text_encoding_B = model.get_text_features(**ids_B)\n", "\n", "similarity_str = 'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' + token_similarity(text_encoding_A[0] , text_encoding_B[0])\n", "\n", "\n", "print(similarity_str)\n", "#outputs = model(**inputs)\n", "#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n", "#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities" ], "metadata": { "id": "QQOjh5BvnG8M", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime." ], "metadata": { "id": "hyK423TQCRup" } }, { "cell_type": "markdown", "source": [ "# ↓ Sub modules (use these to build your own projects) ↓" ], "metadata": { "id": "_d8WtPgtAymM" } }, { "cell_type": "code", "source": [ "# @title 📝 -> 🆔 Tokenize prompt into IDs\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "\n", "prompt= \"banana\" # @param {type:'string'}\n", "\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "print(input_ids)\n", "\n", "\n", "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n", "\n", "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID." ], "metadata": { "id": "RPdkYzT2_X85", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 🆔->🥢 Take the ID at index 1 from above result and get its corresponding tensor value\n", "\n", "id_A = input_ids[1]\n", "A = token[id_A]\n", "_A = LA.vector_norm(A, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (prompt == \"\"):\n", " id_A = -1\n", " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " A = R*(_A/_R)\n", "\n", "#Save a copy of the tensor A\n", "id_P = id_A\n", "P = A\n", "_P = LA.vector_norm(A, ord=2)\n" ], "metadata": { "id": "YqdiF8DIz9Wu", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 🥢 -> 🥢🔀 Take the ID at index 1 from above result and modify it (optional)\n", "mix_with = \"\" # @param {type:'string'}\n", "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n", "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n", "\n", "#------#\n", "#If set to TRUE , this will use the output of this cell , tensor A, as the input of this cell the 2nd time we run it. Use this feature to mix many tokens into A\n", "re_iterate_tensor_A = True # @param {\"type\":\"boolean\"}\n", "if (re_iterate_tensor_A == False) :\n", " #prevent re-iterating A by reading from stored copy\n", " id_A = id_P\n", " A = P\n", " _A = _P\n", "#----#\n", "\n", "tokenizer_output = tokenizer(text = mix_with)\n", "input_ids = tokenizer_output['input_ids']\n", "id_C = input_ids[1]\n", "C = token[id_C]\n", "_C = LA.vector_norm(C, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (mix_with == \"\"):\n", " id_C = -1\n", " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " C = R*(_C/_R)\n", "\n", "if (mix_method == \"None\"):\n", " print(\"No operation\")\n", "\n", "if (mix_method == \"Average\"):\n", " A = w*A + (1-w)*C\n", " _A = LA.vector_norm(A, ord=2)\n", " print(\"Tokenized prompt tensor A has been recalculated as A = w*A + (1-w)*C , where C is the tokenized prompt 'mix_with' tensor C\")\n", "\n", "if (mix_method == \"Subtract\"):\n", " tmp = (A/_A) - (C/_C)\n", " _tmp = LA.vector_norm(tmp, ord=2)\n", " A = tmp*((w*_A + (1-w)*_C)/_tmp)\n", " _A = LA.vector_norm(A, ord=2)\n", " print(\"Tokenized prompt tensor A has been recalculated as A = (w*_A + (1-w)*_C) * norm(w*A - (1-w)*C) , where C is the tokenized prompt 'mix_with' tensor C\")\n", "\n", "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor" ], "metadata": { "id": "oXbNSRSKPgRr", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "# @title 🥢->🧾🥢 Find Similiar Tokens to ID at index 1 from above result\n", "dots = torch.zeros(NUM_TOKENS)\n", "for index in range(NUM_TOKENS):\n", " id_B = index\n", " B = token[id_B]\n", " _B = LA.vector_norm(B, ord=2)\n", " result = torch.dot(A,B)/(_A*_B)\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "\n", "name_A = \"A of random type\"\n", "if (id_A>-1):\n", " name_A = vocab[id_A]\n", "\n", "name_C = \"token C of random type\"\n", "if (id_C>-1):\n", " name_C = vocab[id_C]\n", "\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "#----#\n", "if (mix_method == \"Average\"):\n", " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"Subtract\"):\n", " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"None\"):\n", " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n", "\n", "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result" ], "metadata": { "id": "juxsvco9B0iV", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "cYYu5C5C6MHH" } }, { "cell_type": "code", "source": [ "# @title 🥢🧾 -> 🖨️ Print Result from the 'Similiar Tokens' list from above result\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n", " if (print_Divider):\n", " print('--------')\n", "\n", "#Print the sorted list from above result" ], "metadata": { "id": "YIEmLAzbHeuo", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "# @title 🆔 Get similarity % of two token IDs\n", "id_for_token_A = 4567 # @param {type:'number'}\n", "id_for_token_B = 4343 # @param {type:'number'}\n", "\n", "similarity_str = 'The similarity between tokens A and B is ' + similarity(id_for_token_A , id_for_token_B)\n", "\n", "print(similarity_str)\n", "\n", "#Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407" ], "metadata": { "id": "MwmOdC9cNZty", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "\n", "\n", "# How does this notebook work?\n", "\n", "Similiar vectors = similiar output in the SD 1.5 / SDXL / FLUX model\n", "\n", "CLIP converts the prompt text to vectors (“tensors”) , with float32 values usually ranging from -1 to 1.\n", "\n", "Dimensions are \\[ 1x768 ] tensors for SD 1.5 , and a \\[ 1x768 , 1x1024 ] tensor for SDXL and FLUX.\n", "\n", "The SD models and FLUX converts these vectors to an image.\n", "\n", "This notebook takes an input string , tokenizes it and matches the first token against the 49407 token vectors in the vocab.json : [https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main/tokenizer](https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fblack-forest-labs%2FFLUX.1-dev%2Ftree%2Fmain%2Ftokenizer)\n", "\n", "It finds the “most similiar tokens” in the list. Similarity is the theta angle between the token vectors.\n", "\n", "