{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "# Load the tokens into the colab\n", "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n", "import torch\n", "from torch import linalg as LA\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "%cd content/sd_tokens\n", "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)" ], "metadata": { "id": "Ch9puvwKH1s3" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "print(token[100].shape) #dimension of the tokens" ], "metadata": { "id": "S_Yh9gH_OUA1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "def absolute_value(x):\n", " return max(x, -x)\n", "\n", "def similarity(id_A , id_B):\n", " #Tensors\n", " A = token[id_A]\n", " B = token[id_B]\n", "\n", " #Tensor vector length (2nd order, i.e (a^2 + b^2 + ....)^(1/2)\n", " _A = LA.vector_norm(A, ord=2)\n", " _B = LA.vector_norm(B, ord=2)\n", "\n", " result = torch.dot(A,B)/(_A*_B)\n", " similarity_pcnt = absolute_value(result.item()*100)\n", "\n", " similarity_pcnt_aprox = round(similarity_pcnt, 3)\n", "\n", " return f'{similarity_pcnt_aprox} %'" ], "metadata": { "id": "fxquCxFaUxAZ" }, "execution_count": 35, "outputs": [] }, { "cell_type": "markdown", "source": [ "Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407" ], "metadata": { "id": "kX72bAuhOtlT" } }, { "cell_type": "code", "source": [ "id_for_token_A = 500 # @param {type:'number'}\n", "id_for_token_B = 4343 # @param {type:'number'}\n", "\n", "similarity = similarity(id_for_token_A , id_for_token_B)\n", "\n", "print(f'The similarity between tokens A and B is {similarity}')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MwmOdC9cNZty", "outputId": "e75c4987-9d13-4ec7-ca36-775b8dbac707" }, "execution_count": 36, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The similarity between tokens A and B is 4.001 %\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "oJC12JgJUPrB" }, "execution_count": null, "outputs": [] } ] }