{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "This Notebook is a Stable-diffusion tool which allows you to find similiar tokens from the SD 1.5 vocab.json that you can use for text-to-image generation. Try this Free online SD 1.5 generator with the results: https://perchance.org/fusion-ai-image-generator\n", "\n", "Scroll to the bottom of the notebook to see the guide for how this works." ], "metadata": { "id": "L7JTcbOdBPfh" } }, { "cell_type": "code", "source": [ "# @title ✳️ Load/initialize values\n", "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd /content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n", "#-----#\n", "\n", "#Import the vocab.json\n", "import json\n", "import pandas as pd\n", "with open('vocab.json', 'r') as f:\n", " data = json.load(f)\n", "\n", "_df = pd.DataFrame({'count': data})['count']\n", "\n", "vocab = {\n", " value: key for key, value in _df.items()\n", "}\n", "#-----#\n", "\n", "# Define functions/constants\n", "NUM_TOKENS = 49407\n", "\n", "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "\n", "def token_similarity(A, B):\n", "\n", " #Vector length#\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", "\n", " #----#\n", " result = torch.dot(A,B)/(_A*_B)\n", " #similarity_pcnt = absolute_value(result.item()*100)\n", " similarity_pcnt = result.item()*100\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", " result = f'{similarity_pcnt_aprox} %'\n", " return result\n", "\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", " return token_similarity(A, B)\n", "#----#\n", "\n", "#print(vocab[8922]) #the vocab item for ID 8922\n", "#print(token[8922].shape) #dimension of the token\n", "\n", "mix_with = \"\"\n", "mix_method = \"None\"\n", "\n", "#-------------#\n", "# UNUSED\n", "\n", "# Get the 10 lowest values from a tensor as a string\n", "def get_valleys (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=False)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result\n", "\n", "# Get the 10 highest values from a tensor as a string\n", "def get_peaks (A):\n", " sorted, indices = torch.sort(A,dim=0 , descending=True)\n", " result = \"{\"\n", " for index in range(10):\n", " id = indices[index].item()\n", " result = result + f\"{id}\"\n", " if(index<9):\n", " result = result + \",\"\n", " result = result + \"}\"\n", " return result" ], "metadata": { "id": "Ch9puvwKH1s3", "collapsed": true, "cellView": "form" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ⚡ Get similiar tokens\n", "import torch\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "\n", "# @markdown Write name of token to match against\n", "prompt= \"banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n", "# @markdown (optional) Mix the token with something else\n", "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n", "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n", "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n", "# @markdown Limit char size of included token\n", "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n", "\n", "tokenizer_output = tokenizer(text = prompt)\n", "input_ids = tokenizer_output['input_ids']\n", "id_A = input_ids[1]\n", "A = torch.tensor(token[id_A])\n", "A = A/A.norm(p=2, dim=-1, keepdim=True)\n", "#-----#\n", "tokenizer_output = tokenizer(text = mix_with)\n", "input_ids = tokenizer_output['input_ids']\n", "id_C = input_ids[1]\n", "C = torch.tensor(token[id_C])\n", "C = C/C.norm(p=2, dim=-1, keepdim=True)\n", "#-----#\n", "sim_AC = torch.dot(A,C)\n", "#-----#\n", "print(input_ids)\n", "#-----#\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (prompt == \"\"):\n", " id_A = -1\n", " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n", " R = torch.rand(A.shape)\n", " R = R/R.norm(p=2, dim=-1, keepdim=True)\n", " A = R\n", " name_A = 'random_A'\n", "\n", "#if no imput exists we just randomize the entire thing\n", "if (mix_with == \"\"):\n", " id_C = -1\n", " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n", " R = torch.rand(A.shape)\n", " R = R/R.norm(p=2, dim=-1, keepdim=True)\n", " C = R\n", " name_C = 'random_C'\n", "\n", "name_A = \"A of random type\"\n", "if (id_A>-1):\n", " name_A = vocab[id_A]\n", "\n", "name_C = \"token C of random type\"\n", "if (id_C>-1):\n", " name_C = vocab[id_C]\n", "\n", "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {round(sim_AC.item()*100,2)} %\")\n", "\n", "if (mix_method == \"None\"):\n", " print(\"No operation\")\n", "\n", "if (mix_method == \"Average\"):\n", " A = w*A + (1-w)*C\n", " _A = LA.vector_norm(A, ord=2)\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n", "\n", "if (mix_method == \"Subtract\"):\n", " tmp = w*A - (1-w)*C\n", " tmp = tmp/tmp.norm(p=2, dim=-1, keepdim=True)\n", " A = tmp\n", " #//---//\n", " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n", "\n", "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n", "\n", "dots = torch.zeros(NUM_TOKENS)\n", "for index in range(NUM_TOKENS):\n", " id_B = index\n", " B = torch.tensor(token[id_B])\n", " B = B/B.norm(p=2, dim=-1, keepdim=True)\n", " sim_AB = torch.dot(A,B)\n", " dots[index] = sim_AB\n", "\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "#----#\n", "if (mix_method == \"Average\"):\n", " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"Subtract\"):\n", " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n", "if (mix_method == \"None\"):\n", " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n", "\n", "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n", "\n", "# @markdown Set print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print('Here is the result : ')\n", "print('')\n", "\n", "for index in range(list_size):\n", " id = indices[index].item()\n", " if (print_Name):\n", " print(f'{vocab[id]}') # vocab item\n", " if (print_ID):\n", " print(f'ID = {id}') # IDs\n", " if (print_Similarity):\n", " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n", " if (print_Divider):\n", " print('--------')\n", "\n", "#Print the sorted list from above result\n", "\n", "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n", "\n", "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID." ], "metadata": { "id": "iWeFnT1gAx6A" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Below image interrogator appends CLIP tokens to either end of the 'must_contain' text , and seeks to maximize similarity with the image encoding.\n", "\n", "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n", "\n", "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n", "\n" ], "metadata": { "id": "IUCuV9RtQpBn" } }, { "cell_type": "code", "source": [ "# @title 🪐🖼️ -> 📝 Slow Recursive Token Image interrogator\n", "\n", "# @markdown # What do you want to to mimic?\n", "use = '🖼️image_encoding from image' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n", "# @markdown --------------------------\n", "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n", "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "\n", "prompt_A = prompt\n", "\n", "from google.colab import files\n", "def upload_files():\n", " from google.colab import files\n", " uploaded = files.upload()\n", " for k, v in uploaded.items():\n", " open(k, 'wb').write(v)\n", " return list(uploaded.keys())\n", "#Get image\n", "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n", "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n", "\n", "\n", "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n", "\n", "# @markdown --------------------------\n", "from PIL import Image\n", "import requests\n", "image_A = \"\"\n", "\n", "#----#\n", "\n", "if(use == '🖼️image_encoding from image'):\n", " if image_url == \"\":\n", " import cv2\n", " from google.colab.patches import cv2_imshow\n", " # Open the image.\n", " if colab_image_path == \"\":\n", " keys = upload_files()\n", " for key in keys:\n", " image_A = cv2.imread(\"/content/sd_tokens/\" + key)\n", " colab_image_path = \"/content/sd_tokens/\" + key\n", " else:\n", " image_A = cv2.imread(\"/content/sd_tokens/\" + colab_image_path)\n", " else:\n", " image_A = Image.open(requests.get(image_url, stream=True).raw)\n", "#------#\n", "\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n", "from transformers import CLIPProcessor, CLIPModel\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "\n", "\n", "if(use == '🖼️image_encoding from image'):\n", " # Get image features\n", " inputs = processor(images=image_A, return_tensors=\"pt\")\n", " image_features = model.get_image_features(**inputs)\n", " image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n", " name_A = \"the image\"\n", "#-----#\n", "\n", "\n", "if(use == '📝text_encoding from prompt'):\n", " # Get text features\n", " inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n", " text_features_A = model.get_text_features(**inputs)\n", " name_A = prompt\n", "#-----#\n", "\n", "\n", "# @markdown # The output...\n", "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n", "token_B = must_contain\n", "\n", "# @markdown -----\n", "\n", "# @markdown # Use a range of tokens from the vocab.json (slow method)\n", "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n", "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n", "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n", "\n", "#markdown Limit char size of included token <----- Disabled\n", "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n", "char_range = 50 #param {type:\"slider\", min:0, max: 20, step:1}\n", "\n", "\n", "# markdown # ...or paste prompt items\n", "# markdown Format must be {item1|item2|...}. You can aquire prompt items using the Randomizer in the fusion gen: https://perchance.org/fusion-ai-image-generator\n", "_enable = False # param {\"type\":\"boolean\"}\n", "prompt_items = \"\" # param {\"type\":\"string\",\"placeholder\":\"{item1|item2|...}\"}\n", "#-----#\n", "name_B = must_contain\n", "#-----#\n", "\n", "START = start_search_at_ID\n", "RANGE = min(search_range , 49407 - start_search_at_ID)\n", "\n", "dots = torch.zeros(RANGE)\n", "is_BC = torch.zeros(RANGE)\n", "\n", "import re\n", "\n", "for index in range(RANGE):\n", " id_C = START + index\n", " name_C = vocab[id_C]\n", " is_Prefix = 0\n", "\n", "\n", " #Skip if non-AZ characters are found\n", " if re.search(\"\\W/g\" , name_C.replace('', '')):\n", " continue\n", "\n", "\n", " # Decide if we should process prefix/suffix tokens\n", " if name_C.find('')<=-1:\n", " is_Prefix = 1\n", " if restrictions != \"Prefix only\":\n", " continue\n", " else:\n", " if restrictions == \"Prefix only\":\n", " continue\n", " #-----#\n", "\n", " # Decide if char-size is within range\n", " if len(name_C) < min_char_size:\n", " continue\n", " if len(name_C) > min_char_size + char_range:\n", " continue\n", " #-----#\n", " name_CB = must_start_with + name_C + name_B + must_end_with\n", " if is_Prefix>0:\n", " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n", " #-----#\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_CB)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_CB)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim_CB = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", "\n", "\n", " #-----#\n", " if restrictions == \"Prefix only\":\n", " result = sim_CB\n", " result = result.item()\n", " dots[index] = result\n", " continue\n", " #-----#\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_BC)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " name_BC = must_start_with + name_B + name_C + must_end_with\n", " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n", " text_features = model.get_text_features(**ids_BC)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim_BC = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", " result = sim_CB\n", " if(sim_BC > sim_CB):\n", " is_BC[index] = 1\n", " result = sim_BC\n", "\n", " #result = absolute_value(result.item())\n", " result = result.item()\n", " dots[index] = result\n", "#----#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "\n", "# @markdown ----------\n", "# @markdown # Print options\n", "list_size = 100 # @param {type:'number'}\n", "print_ID = False # @param {type:\"boolean\"}\n", "print_Similarity = True # @param {type:\"boolean\"}\n", "print_Name = True # @param {type:\"boolean\"}\n", "print_Divider = True # @param {type:\"boolean\"}\n", "\n", "\n", "if (print_Divider):\n", " print('//---//')\n", "\n", "print('')\n", "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n", "print('')\n", "#----#\n", "aheads = \"{\"\n", "trails = \"{\"\n", "tmp = \"\"\n", "#----#\n", "max_sim_ahead = 0\n", "max_sim_trail = 0\n", "sim = 0\n", "max_name_ahead = ''\n", "max_name_trail = ''\n", "#----#\n", "for index in range(min(list_size,RANGE)):\n", " id = START + indices[index].item()\n", " name = vocab[id]\n", " #-----#\n", " if (name.find('')<=-1):\n", " name = name + '-'\n", " else:\n", " name = name.replace('', ' ')\n", " if(is_BC[index]>0):\n", " trails = trails + name + \"|\"\n", " else:\n", " aheads = aheads + name + \"|\"\n", " #----#\n", " sim = sorted[index].item()\n", "\n", " if(is_BC[index]>0):\n", " if sim>max_sim_ahead:\n", " max_sim_ahead = sim\n", " max_name_ahead = name\n", " else:\n", " if sim>max_sim_trail:\n", " max_sim_trail = sim\n", " max_name_trail = name\n", "\n", "#------#\n", "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"\", \" \").replace(\"{&&&&\", \"\")\n", "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"\", \" \").replace(\"{&&&&\", \"\")\n", "max_sim_ahead=max_sim_ahead\n", "max_sim_ahead=max_sim_trail\n", "#-----#\n", "print(f\"place these items ahead of prompt : {aheads}\")\n", "print(\"\")\n", "print(f\"place these items behind the prompt : {trails}\")\n", "print(\"\")\n", "print(f\"max_similarity = {max_sim_ahead} % when using '{max_name_ahead + must_contain}' \")\n", "print(\"\")\n", "print(f\"max_similarity = {max_sim_trail} % when using '{must_contain + max_name_trail}' \")\n", "#-----#\n", "#STEP 2\n", "import random\n", "\n", "names = {}\n", "\n", "NUM_PERMUTATIONS = 4 # 0 1 2 3\n", "dots = torch.zeros(NUM_PERMUTATIONS)\n", "for index in range(NUM_PERMUTATIONS):\n", " name = must_start_with\n", " if index == 0 : name = name + must_contain\n", " if index == 1 : name = name + max_name_ahead + must_contain\n", " if index == 2 : name = name + must_contain + max_name_trail\n", " if index == 3 : name = name + max_name_ahead + must_contain + max_name_trail\n", " name = name + must_end_with\n", " #----#\n", " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n", "\n", " if(use == '🖼️image_encoding from image'):\n", " text_features = model.get_text_features(**ids)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " logit_scale = model.logit_scale.exp()\n", " torch.matmul(text_features, image_features.t()) * logit_scale\n", " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n", " #-----#\n", "\n", " if(use == '📝text_encoding from prompt'):\n", " text_features = model.get_text_features(**ids)\n", " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n", " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n", " #-----#\n", "\n", "\n", " dots[index] = sim\n", " names[index] = name\n", "\n", "\n", "#------#\n", "\n", "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n", "\n", "for index in range(NUM_PERMUTATIONS):\n", " print(names[indices[index].item()])\n", " print(f'similiarity = {round(sorted[index].item(),2)} %')\n", " print('------')" ], "metadata": { "collapsed": true, "id": "fi0jRruI0-tu" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 💫 Compare Text encodings\n", "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "prompt_B = \"bike \" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n", "use_token_padding = True # param {type:\"boolean\"} <----- Enabled by default\n", "#-----#\n", "from transformers import AutoTokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\",\n", "clean_up_tokenization_spaces = False)\n", "#-----#\n", "from transformers import CLIPProcessor, CLIPModel\n", "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n", "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n", "#----#\n", "inputs = tokenizer(text = prompt_A, padding=True, return_tensors=\"pt\")\n", "text_features_A = model.get_text_features(**inputs)\n", "text_features_A = text_features_A / text_features_A.norm(p=2, dim=-1, keepdim=True)\n", "name_A = prompt_A\n", "#----#\n", "inputs = tokenizer(text = prompt_B, padding=True, return_tensors=\"pt\")\n", "text_features_B = model.get_text_features(**inputs)\n", "text_features_B = text_features_B / text_features_B.norm(p=2, dim=-1, keepdim=True)\n", "name_B = prompt_B\n", "#----#\n", "import torch\n", "sim_AB = torch.nn.functional.cosine_similarity(text_features_A, text_features_B)\n", "#----#\n", "print(f'The similarity between the text_encoding for A:\"{prompt_A}\" and B: \"{prompt_B}\" is {round(sim_AB.item()*100,2)} %')" ], "metadata": { "id": "QQOjh5BvnG8M", "collapsed": true }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime." ], "metadata": { "id": "hyK423TQCRup" } }, { "cell_type": "markdown", "source": [ "\n", "\n", "# How does this notebook work?\n", "\n", "Similiar vectors = similiar output in the SD 1.5 / SDXL / FLUX model\n", "\n", "CLIP converts the prompt text to vectors (“tensors”) , with float32 values usually ranging from -1 to 1.\n", "\n", "Dimensions are \\[ 1x768 ] tensors for SD 1.5 , and a \\[ 1x768 , 1x1024 ] tensor for SDXL and FLUX.\n", "\n", "The SD models and FLUX converts these vectors to an image.\n", "\n", "This notebook takes an input string , tokenizes it and matches the first token against the 49407 token vectors in the vocab.json : [https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main/tokenizer](https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fblack-forest-labs%2FFLUX.1-dev%2Ftree%2Fmain%2Ftokenizer)\n", "\n", "It finds the “most similiar tokens” in the list. Similarity is the theta angle between the token vectors.\n", "\n", "