File size: 3,049 Bytes
44eca84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"# Load the tokens into the colab\n",
"!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
"import torch\n",
"from torch import linalg as LA\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)"
],
"metadata": {
"id": "Ch9puvwKH1s3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(token[100].shape) #dimension of the tokens"
],
"metadata": {
"id": "S_Yh9gH_OUA1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def absolute_value(x):\n",
" return max(x, -x)\n",
"\n",
"def similarity(id_A , id_B):\n",
" #Tensors\n",
" A = token[id_A]\n",
" B = token[id_B]\n",
"\n",
" #Tensor vector length (2nd order, i.e (a^2 + b^2 + ....)^(1/2)\n",
" _A = LA.vector_norm(A, ord=2)\n",
" _B = LA.vector_norm(B, ord=2)\n",
"\n",
" result = torch.dot(A,B)/(_A*_B)\n",
" similarity_pcnt = absolute_value(result.item()*100)\n",
"\n",
" similarity_pcnt_aprox = round(similarity_pcnt, 3)\n",
"\n",
" return f'{similarity_pcnt_aprox} %'"
],
"metadata": {
"id": "fxquCxFaUxAZ"
},
"execution_count": 35,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407"
],
"metadata": {
"id": "kX72bAuhOtlT"
}
},
{
"cell_type": "code",
"source": [
"id_for_token_A = 500 # @param {type:'number'}\n",
"id_for_token_B = 4343 # @param {type:'number'}\n",
"\n",
"similarity = similarity(id_for_token_A , id_for_token_B)\n",
"\n",
"print(f'The similarity between tokens A and B is {similarity}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MwmOdC9cNZty",
"outputId": "e75c4987-9d13-4ec7-ca36-775b8dbac707"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The similarity between tokens A and B is 4.001 %\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "oJC12JgJUPrB"
},
"execution_count": null,
"outputs": []
}
]
} |