{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "This Notebook is a Stable-diffusion tool which allows you to find similiar tokens from the SD 1.5 vocab.json that you can use for text-to-image generation. Try this Free online SD 1.5 generator with the results: https://perchance.org/fusion-ai-image-generator\n", "\n", "Scroll to the bottom of the notebook to see the guide for how this works." ], "metadata": { "id": "L7JTcbOdBPfh" } }, { "cell_type": "code", "source": [ "# @title ✳️ Load/initialize values\n", "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd /content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n", "#-----#\n", "\n", "#Import the vocab.json\n", "import json\n", "import pandas as pd\n", "with open('vocab.json', 'r') as f:\n", " data = json.load(f)\n", "\n", "_df = pd.DataFrame({'count': data})['count']\n", "\n", "vocab = {\n", " value: key for key, value in _df.items()\n", "}\n", "#-----#\n", "\n", "# Define functions/constants\n", "NUM_TOKENS = 49407\n", "\n", "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "\n", "def token_similarity(A, B):\n", "\n", " #Vector length#\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", "\n", " #----#\n", " result = torch.dot(A,B)/(_A*_B)\n", " #similarity_pcnt = absolute_value(result.item()*100)\n", " similarity_pcnt = result.item()*100\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", " result = f'{similarity_pcnt_aprox} %'\n", " return result\n", "\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", " return token_similarity(A, B)\n", "#----#\n", "\n", "#print(vocab[8922]) #the vocab item for ID 8922\n", "#print(token[8922].shape) #dimension of the token\n", "\n", "mix_with = \"\"\n", "mix_method = \"None\"\n", "\n", "#-------------#\n", "# UNUSED\n", "\n", "# Get the 10 lowest values from a tensor as a string\n", "def get_valleys (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=False)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result\n", "\n", "# Get the 10 highest values from a tensor as a string\n", "def get_peaks (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=True)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result" ], "metadata": { "id": "Ch9puvwKH1s3", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ⚡ Get similiar tokens\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "\n", "# @markdown Write name of token to match against\n", "prompt= \"banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n", "\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "print(input_ids)\n", "\n", "\n", "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n", "\n", "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n", "\n", "id_A = input_ids[1]\n", "A = token[id_A]\n", "_A = LA.vector_norm(A, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (prompt == \"\"):\n", " id_A = -1\n", " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " A = R*(_A/_R)\n", " name_A = 'random_A'\n", "\n", "# @markdown (optional) Mix the token with something else\n", "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n", "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n", "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n", "\n", "# @markdown Limit char size of included token\n", "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "\n", "tokenizer_output = tokenizer(text = mix_with)\n", "input_ids = tokenizer_output['input_ids']\n", "id_C = input_ids[1]\n", "C = token[id_C]\n", "_C = LA.vector_norm(C, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (mix_with == \"\"):\n", " id_C = -1\n", " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " C = R*(_C/_R)\n", " name_C = 'random_C'\n", "\n", "name_A = \"A of random type\"\n", "if (id_A>-1):\n", " name_A = vocab[id_A]\n", "\n", "name_C = \"token C of random type\"\n", "if (id_C>-1):\n", " name_C = vocab[id_C]\n", "\n", "# Peaks feature\n", "#peaks_A = get_valleys(A)\n", "#peaks_C = get_valleys(C)\n", "#print(f\"The elementwise top 10 highest values for A is at indices {peaks_A}\")\n", "#print(\"-------\")\n", "#print(f\"The elementwise top 10 highest values for C is at indices {peaks_C}\")\n", "#print(\"-------\")\n", "#//------//\n", "\n", "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {token_similarity(A, C)}\")\n", "\n", "if (mix_method == \"None\"):\n", " print(\"No operation\")\n", "\n", "if (mix_method == \"Average\"):\n", " A = w*A + (1-w)*C\n", " _A = LA.vector_norm(A, ord=2)\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n", "\n", "if (mix_method == \"Subtract\"):\n", " tmp = w*A - (1-w)*C\n", " _tmp = LA.vector_norm(tmp, ord=2)\n", " A = (_A/_tmp)*tmp\n", " #//---//\n", " _A = LA.vector_norm(A, ord=2)\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n", "\n", "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n", "\n", "dots = torch.zeros(NUM_TOKENS)\n", "for index in range(NUM_TOKENS):\n", " id_B = index\n", " B = token[id_B]\n", " _B = LA.vector_norm(B, ord=2)\n", " result = torch.dot(A,B)/(_A*_B)\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "#----#\n", "if (mix_method == \"Average\"):\n", " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"Subtract\"):\n", " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"None\"):\n", " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n", "\n", "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n", "\n", "# @markdown Set print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print('Here is the result : ')\n", "print('')\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n", " if (print_Divider):\n", " print('--------')\n", "\n", "#Print the sorted list from above result" ], "metadata": { "id": "iWeFnT1gAx6A", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 💫 Compare Text encodings\n", "\n", "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "use_token_padding = True # @param {type:\"boolean\"}\n", "\n", "from transformers import CLIPProcessor, CLIPModel\n", "\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "\n", "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n", "text_encoding_A = model.get_text_features(**ids_A)\n", "\n", "\n", "ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n", "text_encoding_B = model.get_text_features(**ids_B)\n", "\n", "similarity_str = 'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' + token_similarity(text_encoding_A[0] , text_encoding_B[0])\n", "\n", "\n", "print(similarity_str)\n", "#outputs = model(**inputs)\n", "#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n", "#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities" ], "metadata": { "id": "QQOjh5BvnG8M", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 🪐🖼️ -> 📝 Image to prompt : Add single token to existing prompt to match image\n", "from google.colab import files\n", "def getLocalFiles():\n", " _files = files.upload()\n", " if len(_files) >0:\n", " for k,v in _files.items():\n", " open(k,'wb').write(v)\n", "\n", "#Get image\n", "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n", "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload\"}\n", "from PIL import Image\n", "import requests\n", "if url == \"\":\n", " image_A = getLocalFiles()\n", "else:\n", " image_A = Image.open(requests.get(url, stream=True).raw)\n", "\n", "\n", "# Get image features\n", "from transformers import CLIPProcessor, CLIPModel\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "inputs = processor(images=image_A, return_tensors=\"pt\")\n", "image_features = model.get_image_features(**inputs)\n", "text_encoding_A = image_features\n", "A = text_encoding_A[0]\n", "_A = LA.vector_norm(A, ord=2)\n", "prompt_A = \"the image\"\n", "name_A = prompt_A\n", "#-----#\n", "\n", "# @markdown Set conditions for the output\n", "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_contain = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "token_B = must_contain\n", "\n", "# @markdown Limit the search\n", "use_token_padding = True # @param {type:\"boolean\"}\n", "start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n", "search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n", "restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n", "\n", "# @markdown Limit char size of included token\n", "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "\n", "#Tokenize input B\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "tokenizer_output = tokenizer(text = token_B)\n", "input_ids = tokenizer_output['input_ids']\n", "#-----#\n", "name_B = must_contain\n", "#-----#\n", "\n", "START = start_search_at_ID\n", "RANGE = min(search_range , 49407 - start_search_at_ID)\n", "\n", "dots = torch.zeros(RANGE)\n", "is_BC = torch.zeros(RANGE)\n", "for index in range(RANGE):\n", " id_C = START + index\n", " C = token[id_C]\n", " _C = LA.vector_norm(C, ord=2)\n", " name_C = vocab[id_C]\n", "\n", " # Decide if we should process prefix/suffix tokens\n", " if name_C.find('')<=-1:\n", " if restrictions != \"Prefix only\":\n", " continue\n", " else:\n", " if restrictions == \"Prefix only\":\n", " continue\n", " #-----#\n", "\n", " # Decide if char-size is within range\n", " if len(name_C) < min_char_size:\n", " continue\n", " if len(name_C) > min_char_size + char_range:\n", " continue\n", " #-----#\n", "\n", " name_CB = must_start_with + name_C + name_B + must_end_with\n", " if restrictions == \"Prefix only\":\n", " name_CB = must_start_with + name_C + '-' + name_B + must_end_with\n", " #-----#\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_encoding_CB = model.get_text_features(**ids_CB)\n", " CB = text_encoding_CB[0]\n", " _CB = LA.vector_norm(CB, ord=2)\n", " sim_CB = torch.dot(A,CB)/(_A*_CB)\n", " #-----#\n", " if restrictions == \"Prefix only\":\n", " result = sim_CB\n", " result = result.item()\n", " dots[index] = result\n", " continue\n", " #-----#\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_encoding_BC = model.get_text_features(**ids_BC)\n", " BC = text_encoding_BC[0]\n", " _BC = LA.vector_norm(BC, ord=2)\n", " sim_BC = torch.dot(A,BC)/(_A*_BC)\n", " #-----#\n", "\n", " result = sim_CB\n", " if(sim_BC > sim_CB):\n", " is_BC[index] = 1\n", " result = sim_BC\n", "\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "#----#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "# @markdown Print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n", "print('')\n", "\n", "for index in range(min(list_size,RANGE)):\n", " id = START + indices[index].item()\n", " if (print_Name):\n", " if(is_BC[index]>0):\n", " print(must_start_with + name_B + vocab[id] + must_end_with)\n", " else:\n", " if restrictions == \"Prefix only\":\n", " print(must_start_with + vocab[id] + '-' + name_B + must_end_with)\n", " else:\n", " print(must_start_with + vocab[id] + name_B + must_end_with)\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n", " if (print_Divider):\n", " print('--------')\n", "\n", "\n", "\n", "\n", "\n" ], "metadata": { "collapsed": true, "cellView": "form", "id": "fi0jRruI0-tu", "outputId": "6d7e8c39-a117-4b35-acfe-2a128c65aeb7", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "//---//\n", "\n", "These token pairings within the range ID = 12500 to ID = 13000 most closely match the text_encoding for the prompt \"the image\" : \n", "\n", "sitsyellow\n", "similiarity = 23.02 %\n", "--------\n", "neysyellow\n", "similiarity = 19.74 %\n", "--------\n", "codyyellow\n", "similiarity = 18.61 %\n", "--------\n", "wnsyellow\n", "similiarity = 18.43 %\n", "--------\n", "javayellow\n", "similiarity = 18.15 %\n", "--------\n", "jjyellow\n", "similiarity = 18.03 %\n", "--------\n", "enoyellow\n", "similiarity = 17.87 %\n", "--------\n", "cledyellow\n", "similiarity = 17.85 %\n", "--------\n", "nomyellow\n", "similiarity = 17.75 %\n", "--------\n", "dadsyellow\n", "similiarity = 17.5 %\n", "--------\n", "milyellow\n", "similiarity = 17.47 %\n", "--------\n", "whomyellow\n", "similiarity = 17.37 %\n", "--------\n", "itvyellow\n", "similiarity = 17.34 %\n", "--------\n", "vibeyellow\n", "similiarity = 17.2 %\n", "--------\n", "noiryellow\n", "similiarity = 17.14 %\n", "--------\n", "yellowarel\n", "similiarity = 17.1 %\n", "--------\n", "#âĢ¦yellow\n", "similiarity = 17.04 %\n", "--------\n", "mayayellow\n", "similiarity = 17.03 %\n", "--------\n", "yellowbam\n", "similiarity = 17.01 %\n", "--------\n", "ertsyellow\n", "similiarity = 17.01 %\n", "--------\n", "xcyellow\n", "similiarity = 16.98 %\n", "--------\n", "mobyellow\n", "similiarity = 16.89 %\n", "--------\n", "deesyellow\n", "similiarity = 16.87 %\n", "--------\n", "iccyellow\n", "similiarity = 16.75 %\n", "--------\n", "alyyellow\n", "similiarity = 16.63 %\n", "--------\n", "lisyellow\n", "similiarity = 16.63 %\n", "--------\n", "yellowturf\n", "similiarity = 16.62 %\n", "--------\n", "yellowbaba\n", "similiarity = 16.58 %\n", "--------\n", ":*yellow\n", "similiarity = 16.42 %\n", "--------\n", "inhoyellow\n", "similiarity = 16.39 %\n", "--------\n", "yellowhes\n", "similiarity = 16.37 %\n", "--------\n", "nityyellow\n", "similiarity = 16.3 %\n", "--------\n", "lustyellow\n", "similiarity = 16.3 %\n", "--------\n", "ikhyellow\n", "similiarity = 16.26 %\n", "--------\n", "nytyellow\n", "similiarity = 16.24 %\n", "--------\n", "(+yellow\n", "similiarity = 16.11 %\n", "--------\n", "fotoyellow\n", "similiarity = 16.11 %\n", "--------\n", "stlyellow\n", "similiarity = 16.06 %\n", "--------\n", "mickyellow\n", "similiarity = 16.06 %\n", "--------\n", "...@yellow\n", "similiarity = 16.05 %\n", "--------\n", "ughyellow\n", "similiarity = 16.05 %\n", "--------\n", "groyellow\n", "similiarity = 16.01 %\n", "--------\n", "wskiyellow\n", "similiarity = 16.01 %\n", "--------\n", "ðŁĴ«yellow\n", "similiarity = 15.74 %\n", "--------\n", "deenyellow\n", "similiarity = 15.73 %\n", "--------\n", "assyyellow\n", "similiarity = 15.72 %\n", "--------\n", "mtvyellow\n", "similiarity = 15.72 %\n", "--------\n", "yellowðŁĺ»\n", "similiarity = 15.72 %\n", "--------\n", "yellowfrm\n", "similiarity = 15.65 %\n", "--------\n", "mossyellow\n", "similiarity = 15.64 %\n", "--------\n", "bartyellow\n", "similiarity = 15.61 %\n", "--------\n", "twyellow\n", "similiarity = 15.51 %\n", "--------\n", "yellowplug\n", "similiarity = 15.46 %\n", "--------\n", "jenyellow\n", "similiarity = 15.45 %\n", "--------\n", "pstyellow\n", "similiarity = 15.43 %\n", "--------\n", "omfgyellow\n", "similiarity = 15.43 %\n", "--------\n", "dineyellow\n", "similiarity = 15.38 %\n", "--------\n", "vernyellow\n", "similiarity = 15.33 %\n", "--------\n", "renoyellow\n", "similiarity = 15.25 %\n", "--------\n", "yellow´\n", "similiarity = 15.14 %\n", "--------\n", "omicyellow\n", "similiarity = 15.14 %\n", "--------\n", "łï¸ıyellow\n", "similiarity = 15.11 %\n", "--------\n", "yellowgis\n", "similiarity = 15.06 %\n", "--------\n", "auntyellow\n", "similiarity = 15.0 %\n", "--------\n", "joanyellow\n", "similiarity = 14.96 %\n", "--------\n", "anasyellow\n", "similiarity = 14.92 %\n", "--------\n", "ðŁĴĵyellow\n", "similiarity = 14.9 %\n", "--------\n", "chadyellow\n", "similiarity = 14.89 %\n", "--------\n", "yellowsake\n", "similiarity = 14.88 %\n", "--------\n", "guesyellow\n", "similiarity = 14.84 %\n", "--------\n", "gianyellow\n", "similiarity = 14.84 %\n", "--------\n", "asiyellow\n", "similiarity = 14.83 %\n", "--------\n", "yellowoven\n", "similiarity = 14.82 %\n", "--------\n", "juryyellow\n", "similiarity = 14.79 %\n", "--------\n", "blvdyellow\n", "similiarity = 14.75 %\n", "--------\n", "omezyellow\n", "similiarity = 14.72 %\n", "--------\n", "yellowyang\n", "similiarity = 14.7 %\n", "--------\n", "guyellow\n", "similiarity = 14.48 %\n", "--------\n", "yellowova\n", "similiarity = 14.45 %\n", "--------\n", "yellowinez\n", "similiarity = 14.44 %\n", "--------\n", "peiyellow\n", "similiarity = 14.44 %\n", "--------\n", "ãĢIJyellow\n", "similiarity = 14.43 %\n", "--------\n", "ãĢijyellow\n", "similiarity = 14.43 %\n", "--------\n", "ðŁĮŀyellow\n", "similiarity = 14.36 %\n", "--------\n", "ðŁĺĿyellow\n", "similiarity = 14.27 %\n", "--------\n", "troyyellow\n", "similiarity = 14.16 %\n", "--------\n", "paleyellow\n", "similiarity = 14.14 %\n", "--------\n", "boiyellow\n", "similiarity = 14.11 %\n", "--------\n", "nnyellow\n", "similiarity = 14.08 %\n", "--------\n", "âı°yellow\n", "similiarity = 14.01 %\n", "--------\n", "oothyellow\n", "similiarity = 13.93 %\n", "--------\n", "piedyellow\n", "similiarity = 13.9 %\n", "--------\n", "bolayellow\n", "similiarity = 13.79 %\n", "--------\n", "âŀ¡yellow\n", "similiarity = 13.77 %\n", "--------\n", "renayellow\n", "similiarity = 13.75 %\n", "--------\n", "dleyyellow\n", "similiarity = 13.73 %\n", "--------\n", "evanyellow\n", "similiarity = 13.67 %\n", "--------\n", "ponyyellow\n", "similiarity = 13.63 %\n", "--------\n", "reneyellow\n", "similiarity = 13.62 %\n", "--------\n", "mockyellow\n", "similiarity = 13.57 %\n", "--------\n" ] } ] }, { "cell_type": "code", "source": [ "# @title 🪐📝 Prompt to prompt : Add single token to existing prompt to match another prompt\n", "# @markdown Write a text to match against...\n", "prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "\n", "# @markdown Set conditions for the output\n", "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "token_B = must_contain\n", "\n", "# @markdown Limit the search\n", "use_token_padding = True # @param {type:\"boolean\"}\n", "start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n", "search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n", "restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n", "\n", "# @markdown Limit char size of included token\n", "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "\n", "#Tokenize input B\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "tokenizer_output = tokenizer(text = token_B)\n", "input_ids = tokenizer_output['input_ids']\n", "#-----#\n", "name_B = must_contain\n", "#-----#\n", "\n", "from transformers import CLIPProcessor, CLIPModel\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "#-------#\n", "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n", "text_encoding_A = model.get_text_features(**ids_A)\n", "A = text_encoding_A[0]\n", "_A = LA.vector_norm(A, ord=2)\n", "name_A = prompt_A\n", "print(f'a text_encoding was created for the prompt \"{prompt_A}\" ')\n", "print('')\n", "#----#\n", "\n", "START = start_search_at_ID\n", "RANGE = min(search_range , 49407 - start_search_at_ID)\n", "\n", "dots = torch.zeros(RANGE)\n", "is_BC = torch.zeros(RANGE)\n", "for index in range(RANGE):\n", " id_C = START + index\n", " C = token[id_C]\n", " _C = LA.vector_norm(C, ord=2)\n", " name_C = vocab[id_C]\n", "\n", " # Decide if we should process prefix/suffix tokens\n", " if name_C.find('')<=-1:\n", " if restrictions != \"Prefix only\":\n", " continue\n", " else:\n", " if restrictions == \"Prefix only\":\n", " continue\n", " #-----#\n", "\n", " # Decide if char-size is within range\n", " if len(name_C) < min_char_size:\n", " continue\n", " if len(name_C) > min_char_size + char_range:\n", " continue\n", " #-----#\n", "\n", " name_CB = must_start_with + name_C + name_B + must_end_with\n", " if restrictions == \"Prefix only\":\n", " name_CB = must_start_with + name_C + '-' + name_B + must_end_with\n", " #-----#\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_encoding_CB = model.get_text_features(**ids_CB)\n", " CB = text_encoding_CB[0]\n", " _CB = LA.vector_norm(CB, ord=2)\n", " sim_CB = torch.dot(A,CB)/(_A*_CB)\n", " #-----#\n", " if restrictions == \"Prefix only\":\n", " result = sim_CB\n", " result = result.item()\n", " dots[index] = result\n", " continue\n", " #-----#\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_encoding_BC = model.get_text_features(**ids_BC)\n", " BC = text_encoding_BC[0]\n", " _BC = LA.vector_norm(BC, ord=2)\n", " sim_BC = torch.dot(A,BC)/(_A*_BC)\n", " #-----#\n", "\n", " result = sim_CB\n", " if(sim_BC > sim_CB):\n", " is_BC[index] = 1\n", " result = sim_BC\n", "\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "#----#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "# @markdown Print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for the prompt \"{prompt_A}\" : ')\n", "print('')\n", "\n", "for index in range(min(list_size,RANGE)):\n", " id = START + indices[index].item()\n", " if (print_Name):\n", " if(is_BC[index]>0):\n", " print(must_start_with + name_B + vocab[id] + must_end_with)\n", " else:\n", " if restrictions == \"Prefix only\":\n", " print(must_start_with + vocab[id] + '-' + name_B + must_end_with)\n", " else:\n", " print(must_start_with + vocab[id] + name_B + must_end_with)\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n", " if (print_Divider):\n", " print('--------')" ], "metadata": { "cellView": "form", "id": "uDtcm-l8UCJk" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# ↓ Sub modules (use these to build your own projects) ↓" ], "metadata": { "id": "_d8WtPgtAymM" } }, { "cell_type": "code", "source": [ "# @title 📝 -> 🆔 Tokenize prompt into IDs\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "\n", "prompt= \"banana\" # @param {type:'string'}\n", "\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "print(input_ids)\n", "\n", "\n", "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n", "\n", "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID." ], "metadata": { "id": "RPdkYzT2_X85", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 🆔->🥢 Take the ID at index 1 from above result and get its corresponding tensor value\n", "\n", "id_A = input_ids[1]\n", "A = token[id_A]\n", "_A = LA.vector_norm(A, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (prompt == \"\"):\n", " id_A = -1\n", " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " A = R*(_A/_R)\n", "\n", "#Save a copy of the tensor A\n", "id_P = id_A\n", "P = A\n", "_P = LA.vector_norm(A, ord=2)\n" ], "metadata": { "id": "YqdiF8DIz9Wu", "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 🥢 -> 🥢🔀 Take the ID at index 1 from above result and modify it (optional)\n", "mix_with = \"\" # @param {type:'string'}\n", "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n", "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n", "\n", "#------#\n", "#If set to TRUE , this will use the output of this cell , tensor A, as the input of this cell the 2nd time we run it. Use this feature to mix many tokens into A\n", "re_iterate_tensor_A = True # @param {\"type\":\"boolean\"}\n", "if (re_iterate_tensor_A == False) :\n", " #prevent re-iterating A by reading from stored copy\n", " id_A = id_P\n", " A = P\n", " _A = _P\n", "#----#\n", "\n", "tokenizer_output = tokenizer(text = mix_with)\n", "input_ids = tokenizer_output['input_ids']\n", "id_C = input_ids[1]\n", "C = token[id_C]\n", "_C = LA.vector_norm(C, ord=2)\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (mix_with == \"\"):\n", " id_C = -1\n", " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n", " R = torch.rand(768)\n", " _R = LA.vector_norm(R, ord=2)\n", " C = R*(_C/_R)\n", "\n", "if (mix_method == \"None\"):\n", " print(\"No operation\")\n", "\n", "if (mix_method == \"Average\"):\n", " A = w*A + (1-w)*C\n", " _A = LA.vector_norm(A, ord=2)\n", " print(\"Tokenized prompt tensor A has been recalculated as A = w*A + (1-w)*C , where C is the tokenized prompt 'mix_with' tensor C\")\n", "\n", "if (mix_method == \"Subtract\"):\n", " tmp = (A/_A) - (C/_C)\n", " _tmp = LA.vector_norm(tmp, ord=2)\n", " A = tmp*((w*_A + (1-w)*_C)/_tmp)\n", " _A = LA.vector_norm(A, ord=2)\n", " print(\"Tokenized prompt tensor A has been recalculated as A = (w*_A + (1-w)*_C) * norm(w*A - (1-w)*C) , where C is the tokenized prompt 'mix_with' tensor C\")\n", "\n", "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor" ], "metadata": { "id": "oXbNSRSKPgRr", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "# @title 🥢->🧾🥢 Find Similiar Tokens to ID at index 1 from above result\n", "dots = torch.zeros(NUM_TOKENS)\n", "for index in range(NUM_TOKENS):\n", " id_B = index\n", " B = token[id_B]\n", " _B = LA.vector_norm(B, ord=2)\n", " result = torch.dot(A,B)/(_A*_B)\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "\n", "name_A = \"A of random type\"\n", "if (id_A>-1):\n", " name_A = vocab[id_A]\n", "\n", "name_C = \"token C of random type\"\n", "if (id_C>-1):\n", " name_C = vocab[id_C]\n", "\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "#----#\n", "if (mix_method == \"Average\"):\n", " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"Subtract\"):\n", " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"None\"):\n", " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n", "\n", "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result" ], "metadata": { "id": "juxsvco9B0iV", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "cYYu5C5C6MHH" } }, { "cell_type": "code", "source": [ "# @title 🥢🧾 -> 🖨️ Print Result from the 'Similiar Tokens' list from above result\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n", " if (print_Divider):\n", " print('--------')\n", "\n", "#Print the sorted list from above result" ], "metadata": { "id": "YIEmLAzbHeuo", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "# @title 🆔 Get similarity % of two token IDs\n", "id_for_token_A = 4567 # @param {type:'number'}\n", "id_for_token_B = 4343 # @param {type:'number'}\n", "\n", "similarity_str = 'The similarity between tokens A and B is ' + similarity(id_for_token_A , id_for_token_B)\n", "\n", "print(similarity_str)\n", "\n", "#Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407" ], "metadata": { "id": "MwmOdC9cNZty", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "\n", "\n", "# How does this notebook work?\n", "\n", "Similiar vectors = similiar output in the SD 1.5 / SDXL / FLUX model\n", "\n", "CLIP converts the prompt text to vectors (“tensors”) , with float32 values usually ranging from -1 to 1.\n", "\n", "Dimensions are \\[ 1x768 ] tensors for SD 1.5 , and a \\[ 1x768 , 1x1024 ] tensor for SDXL and FLUX.\n", "\n", "The SD models and FLUX converts these vectors to an image.\n", "\n", "This notebook takes an input string , tokenizes it and matches the first token against the 49407 token vectors in the vocab.json : [https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main/tokenizer](https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fblack-forest-labs%2FFLUX.1-dev%2Ftree%2Fmain%2Ftokenizer)\n", "\n", "It finds the “most similiar tokens” in the list. Similarity is the theta angle between the token vectors.\n", "\n", "