codeShare commited on
Commit
a48155a
1 Parent(s): 73c73f4

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +121 -222
sd_token_similarity_calculator.ipynb CHANGED
@@ -116,28 +116,10 @@
116
  "metadata": {
117
  "id": "Ch9puvwKH1s3",
118
  "collapsed": true,
119
- "cellView": "form",
120
- "outputId": "8101e515-49f2-41d4-b03b-4195d56f50de",
121
- "colab": {
122
- "base_uri": "https://localhost:8080/"
123
- }
124
  },
125
- "execution_count": 1,
126
- "outputs": [
127
- {
128
- "output_type": "stream",
129
- "name": "stdout",
130
- "text": [
131
- "Cloning into 'sd_tokens'...\n",
132
- "remote: Enumerating objects: 10, done.\u001b[K\n",
133
- "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
134
- "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
135
- "remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
136
- "Unpacking objects: 100% (10/10), 306.93 KiB | 1.19 MiB/s, done.\n",
137
- "/content/sd_tokens\n"
138
- ]
139
- }
140
- ]
141
  },
142
  {
143
  "cell_type": "code",
@@ -306,7 +288,16 @@
306
  {
307
  "cell_type": "code",
308
  "source": [
309
- "# @title 🪐🖼️ -> 📝 Image to prompt : Create suggestions of things to add to prompt to match image\n",
 
 
 
 
 
 
 
 
 
310
  "from google.colab import files\n",
311
  "def upload_files():\n",
312
  " from google.colab import files\n",
@@ -316,61 +307,80 @@
316
  " return list(uploaded.keys())\n",
317
  "#Get image\n",
318
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
319
- "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
 
 
 
320
  "\n",
321
- "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\":\"(optional) Write colab image path to load from\"}\n",
322
  "from PIL import Image\n",
323
  "import requests\n",
324
  "image_A = \"\"\n",
325
  "\n",
326
  "#----#\n",
327
- "if url == \"\":\n",
328
- " import cv2\n",
329
- " from google.colab.patches import cv2_imshow\n",
330
- " # Open the image.\n",
331
- " if colab_image_path == \"\":\n",
332
- " keys = upload_files()\n",
333
- " for key in keys:\n",
334
- " image_A = cv2.imread(\"/content/sd_tokens/\" + key)\n",
335
- " colab_image_path = \"/content/sd_tokens/\" + key\n",
336
- " else:\n",
337
- " image_A = cv2.imread(colab_image_path)\n",
338
- "else:\n",
339
- " image_A = Image.open(requests.get(url, stream=True).raw)\n",
340
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  "\n",
342
- "# Get image features\n",
 
343
  "from transformers import CLIPProcessor, CLIPModel\n",
344
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
345
  "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
346
- "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
347
- "image_features = model.get_image_features(**inputs)\n",
348
- "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
349
- "prompt_A = \"the image\"\n",
350
- "name_A = prompt_A\n",
 
 
 
351
  "#-----#\n",
352
  "\n",
353
- "# @markdown Set conditions for the output\n",
 
 
 
 
 
 
 
 
 
354
  "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
355
  "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
356
  "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
357
  "token_B = must_contain\n",
358
  "\n",
359
- "# @markdown Limit the search\n",
360
- "use_token_padding = True # @param {type:\"boolean\"}\n",
 
361
  "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
362
- "search_range = 288 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
363
  "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
364
  "\n",
365
- "# @markdown Limit char size of included token\n",
366
- "min_char_size = 3 # @param {type:\"slider\", min:0, max: 20, step:1}\n",
367
- "char_range = 14 # @param {type:\"slider\", min:0, max: 20, step:1}\n",
368
  "\n",
369
- "#Tokenize input B\n",
370
- "from transformers import AutoTokenizer\n",
371
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
372
- "tokenizer_output = tokenizer(text = token_B)\n",
373
- "input_ids = tokenizer_output['input_ids']\n",
374
  "#-----#\n",
375
  "name_B = must_contain\n",
376
  "#-----#\n",
@@ -412,17 +422,29 @@
412
  " if len(name_C) > min_char_size + char_range:\n",
413
  " continue\n",
414
  " #-----#\n",
415
- "\n",
416
  " name_CB = must_start_with + name_C + name_B + must_end_with\n",
417
  " if is_Prefix>0:\n",
418
  " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
419
  " #-----#\n",
420
- " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
421
- " text_features = model.get_text_features(**ids_CB)\n",
422
- " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
423
- " logit_scale = model.logit_scale.exp()\n",
424
- " torch.matmul(text_features, image_features.t()) * logit_scale\n",
425
- " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  " #-----#\n",
427
  " if restrictions == \"Prefix only\":\n",
428
  " result = sim_CB\n",
@@ -430,13 +452,23 @@
430
  " dots[index] = result\n",
431
  " continue\n",
432
  " #-----#\n",
433
- " name_BC = must_start_with + name_B + name_C + must_end_with\n",
434
- " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
435
- " text_features = model.get_text_features(**ids_BC)\n",
436
- " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
437
- " logit_scale = model.logit_scale.exp()\n",
438
- " torch.matmul(text_features, image_features.t()) * logit_scale\n",
439
- " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
 
 
 
 
 
 
 
 
 
 
440
  " #-----#\n",
441
  "\n",
442
  " result = sim_CB\n",
@@ -451,7 +483,9 @@
451
  "\n",
452
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
453
  "\n",
454
- "# @markdown Print options\n",
 
 
455
  "list_size = 100 # @param {type:'number'}\n",
456
  "print_ID = False # @param {type:\"boolean\"}\n",
457
  "print_Similarity = True # @param {type:\"boolean\"}\n",
@@ -531,13 +565,25 @@
531
  " #----#\n",
532
  " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
533
  "\n",
534
- " text_features = model.get_text_features(**ids)\n",
535
- " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
536
- " logit_scale = model.logit_scale.exp()\n",
537
- " torch.matmul(text_features, image_features.t()) * logit_scale\n",
538
- " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
 
 
 
 
 
 
 
 
 
 
539
  " dots[index] = sim\n",
540
  " names[index] = name\n",
 
 
541
  "#------#\n",
542
  "\n",
543
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
@@ -604,153 +650,6 @@
604
  "id": "hyK423TQCRup"
605
  }
606
  },
607
- {
608
- "cell_type": "code",
609
- "source": [
610
- "# @title 🪐📝 Prompt to prompt : Add single token to existing prompt to match another prompt\n",
611
- "# @markdown Write a text to match against...\n",
612
- "prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
613
- "\n",
614
- "# @markdown Set conditions for the output\n",
615
- "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
616
- "must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
617
- "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
618
- "token_B = must_contain\n",
619
- "\n",
620
- "# @markdown Limit the search\n",
621
- "use_token_padding = True # @param {type:\"boolean\"}\n",
622
- "start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
623
- "search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n",
624
- "restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
625
- "\n",
626
- "# @markdown Limit char size of included token\n",
627
- "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
628
- "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
629
- "\n",
630
- "#Tokenize input B\n",
631
- "from transformers import AutoTokenizer\n",
632
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
633
- "tokenizer_output = tokenizer(text = token_B)\n",
634
- "input_ids = tokenizer_output['input_ids']\n",
635
- "#-----#\n",
636
- "name_B = must_contain\n",
637
- "#-----#\n",
638
- "\n",
639
- "from transformers import CLIPProcessor, CLIPModel\n",
640
- "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
641
- "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
642
- "#-------#\n",
643
- "ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n",
644
- "text_encoding_A = model.get_text_features(**ids_A)\n",
645
- "A = text_encoding_A[0]\n",
646
- "_A = LA.vector_norm(A, ord=2)\n",
647
- "name_A = prompt_A\n",
648
- "print(f'a text_encoding was created for the prompt \"{prompt_A}\" ')\n",
649
- "print('')\n",
650
- "#----#\n",
651
- "\n",
652
- "START = start_search_at_ID\n",
653
- "RANGE = min(search_range , 49407 - start_search_at_ID)\n",
654
- "\n",
655
- "dots = torch.zeros(RANGE)\n",
656
- "is_BC = torch.zeros(RANGE)\n",
657
- "for index in range(RANGE):\n",
658
- " id_C = START + index\n",
659
- " C = token[id_C]\n",
660
- " _C = LA.vector_norm(C, ord=2)\n",
661
- " name_C = vocab[id_C]\n",
662
- "\n",
663
- " # Decide if we should process prefix/suffix tokens\n",
664
- " if name_C.find('</w>')<=-1:\n",
665
- " if restrictions != \"Prefix only\":\n",
666
- " continue\n",
667
- " else:\n",
668
- " if restrictions == \"Prefix only\":\n",
669
- " continue\n",
670
- " #-----#\n",
671
- "\n",
672
- " # Decide if char-size is within range\n",
673
- " if len(name_C) < min_char_size:\n",
674
- " continue\n",
675
- " if len(name_C) > min_char_size + char_range:\n",
676
- " continue\n",
677
- " #-----#\n",
678
- "\n",
679
- " name_CB = must_start_with + name_C + name_B + must_end_with\n",
680
- " if restrictions == \"Prefix only\":\n",
681
- " name_CB = must_start_with + name_C + '-' + name_B + must_end_with\n",
682
- " #-----#\n",
683
- " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
684
- " text_encoding_CB = model.get_text_features(**ids_CB)\n",
685
- " CB = text_encoding_CB[0]\n",
686
- " _CB = LA.vector_norm(CB, ord=2)\n",
687
- " sim_CB = torch.dot(A,CB)/(_A*_CB)\n",
688
- " #-----#\n",
689
- " if restrictions == \"Prefix only\":\n",
690
- " result = sim_CB\n",
691
- " result = result.item()\n",
692
- " dots[index] = result\n",
693
- " continue\n",
694
- " #-----#\n",
695
- " name_BC = must_start_with + name_B + name_C + must_end_with\n",
696
- " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
697
- " text_encoding_BC = model.get_text_features(**ids_BC)\n",
698
- " BC = text_encoding_BC[0]\n",
699
- " _BC = LA.vector_norm(BC, ord=2)\n",
700
- " sim_BC = torch.dot(A,BC)/(_A*_BC)\n",
701
- " #-----#\n",
702
- "\n",
703
- " result = sim_CB\n",
704
- " if(sim_BC > sim_CB):\n",
705
- " is_BC[index] = 1\n",
706
- " result = sim_BC\n",
707
- "\n",
708
- " #result = absolute_value(result.item())\n",
709
- " result = result.item()\n",
710
- " dots[index] = result\n",
711
- "#----#\n",
712
- "\n",
713
- "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
714
- "\n",
715
- "# @markdown Print options\n",
716
- "list_size = 100 # @param {type:'number'}\n",
717
- "print_ID = False # @param {type:\"boolean\"}\n",
718
- "print_Similarity = True # @param {type:\"boolean\"}\n",
719
- "print_Name = True # @param {type:\"boolean\"}\n",
720
- "print_Divider = True # @param {type:\"boolean\"}\n",
721
- "\n",
722
- "\n",
723
- "if (print_Divider):\n",
724
- " print('//---//')\n",
725
- "\n",
726
- "print('')\n",
727
- "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for the prompt \"{prompt_A}\" : ')\n",
728
- "print('')\n",
729
- "\n",
730
- "for index in range(min(list_size,RANGE)):\n",
731
- " id = START + indices[index].item()\n",
732
- " if (print_Name):\n",
733
- " if(is_BC[index]>0):\n",
734
- " print(must_start_with + name_B + vocab[id] + must_end_with)\n",
735
- " else:\n",
736
- " if restrictions == \"Prefix only\":\n",
737
- " print(must_start_with + vocab[id] + '-' + name_B + must_end_with)\n",
738
- " else:\n",
739
- " print(must_start_with + vocab[id] + name_B + must_end_with)\n",
740
- " if (print_ID):\n",
741
- " print(f'ID = {id}') # IDs\n",
742
- " if (print_Similarity):\n",
743
- " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
744
- " if (print_Divider):\n",
745
- " print('--------')"
746
- ],
747
- "metadata": {
748
- "cellView": "form",
749
- "id": "uDtcm-l8UCJk"
750
- },
751
- "execution_count": null,
752
- "outputs": []
753
- },
754
  {
755
  "cell_type": "markdown",
756
  "source": [
 
116
  "metadata": {
117
  "id": "Ch9puvwKH1s3",
118
  "collapsed": true,
119
+ "cellView": "form"
 
 
 
 
120
  },
121
+ "execution_count": null,
122
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  },
124
  {
125
  "cell_type": "code",
 
288
  {
289
  "cell_type": "code",
290
  "source": [
291
+ "# @title 🪐🖼️ -> 📝 Slow Recursive Token Image interrogator\n",
292
+ "\n",
293
+ "# @markdown # What do you want to to mimic?\n",
294
+ "use = '🖼️image_encoding from image' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n",
295
+ "# @markdown --------------------------\n",
296
+ "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n",
297
+ "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
298
+ "\n",
299
+ "prompt_A = prompt\n",
300
+ "\n",
301
  "from google.colab import files\n",
302
  "def upload_files():\n",
303
  " from google.colab import files\n",
 
307
  " return list(uploaded.keys())\n",
308
  "#Get image\n",
309
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
310
+ "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
311
+ "\n",
312
+ "\n",
313
+ "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
314
  "\n",
315
+ "# @markdown --------------------------\n",
316
  "from PIL import Image\n",
317
  "import requests\n",
318
  "image_A = \"\"\n",
319
  "\n",
320
  "#----#\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  "\n",
322
+ "if(use == '🖼️image_encoding from image'):\n",
323
+ " if image_url == \"\":\n",
324
+ " import cv2\n",
325
+ " from google.colab.patches import cv2_imshow\n",
326
+ " # Open the image.\n",
327
+ " if colab_image_path == \"\":\n",
328
+ " keys = upload_files()\n",
329
+ " for key in keys:\n",
330
+ " image_A = cv2.imread(\"/content/sd_tokens/\" + key)\n",
331
+ " colab_image_path = \"/content/sd_tokens/\" + key\n",
332
+ " else:\n",
333
+ " image_A = cv2.imread(\"/content/sd_tokens/\" + colab_image_path)\n",
334
+ " else:\n",
335
+ " image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
336
+ "#------#\n",
337
  "\n",
338
+ "from transformers import AutoTokenizer\n",
339
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
340
  "from transformers import CLIPProcessor, CLIPModel\n",
341
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
342
  "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
343
+ "\n",
344
+ "\n",
345
+ "if(use == '🖼️image_encoding from image'):\n",
346
+ " # Get image features\n",
347
+ " inputs = processor(images=image_A, return_tensors=\"pt\")\n",
348
+ " image_features = model.get_image_features(**inputs)\n",
349
+ " image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
350
+ " name_A = \"the image\"\n",
351
  "#-----#\n",
352
  "\n",
353
+ "\n",
354
+ "if(use == '📝text_encoding from prompt'):\n",
355
+ " # Get text features\n",
356
+ " inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
357
+ " text_features_A = model.get_text_features(**inputs)\n",
358
+ " name_A = prompt\n",
359
+ "#-----#\n",
360
+ "\n",
361
+ "\n",
362
+ "# @markdown # The output...\n",
363
  "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
364
  "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
365
  "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
366
  "token_B = must_contain\n",
367
  "\n",
368
+ "# @markdown -----\n",
369
+ "\n",
370
+ "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
371
  "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
372
+ "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
373
  "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
374
  "\n",
375
+ "#markdown Limit char size of included token <----- Disabled\n",
376
+ "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n",
377
+ "char_range = 50 #param {type:\"slider\", min:0, max: 20, step:1}\n",
378
  "\n",
379
+ "\n",
380
+ "# markdown # ...or paste prompt items\n",
381
+ "# markdown Format must be {item1|item2|...}. You can aquire prompt items using the Randomizer in the fusion gen: https://perchance.org/fusion-ai-image-generator\n",
382
+ "_enable = False # param {\"type\":\"boolean\"}\n",
383
+ "prompt_items = \"\" # param {\"type\":\"string\",\"placeholder\":\"{item1|item2|...}\"}\n",
384
  "#-----#\n",
385
  "name_B = must_contain\n",
386
  "#-----#\n",
 
422
  " if len(name_C) > min_char_size + char_range:\n",
423
  " continue\n",
424
  " #-----#\n",
 
425
  " name_CB = must_start_with + name_C + name_B + must_end_with\n",
426
  " if is_Prefix>0:\n",
427
  " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
428
  " #-----#\n",
429
+ "\n",
430
+ " if(use == '🖼️image_encoding from image'):\n",
431
+ " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
432
+ " text_features = model.get_text_features(**ids_CB)\n",
433
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
434
+ " logit_scale = model.logit_scale.exp()\n",
435
+ " torch.matmul(text_features, image_features.t()) * logit_scale\n",
436
+ " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
437
+ " #-----#\n",
438
+ "\n",
439
+ " if(use == '📝text_encoding from prompt'):\n",
440
+ " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
441
+ " text_features = model.get_text_features(**ids_CB)\n",
442
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
443
+ " sim_CB = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
444
+ " #-----#\n",
445
+ "\n",
446
+ "\n",
447
+ "\n",
448
  " #-----#\n",
449
  " if restrictions == \"Prefix only\":\n",
450
  " result = sim_CB\n",
 
452
  " dots[index] = result\n",
453
  " continue\n",
454
  " #-----#\n",
455
+ "\n",
456
+ " if(use == '🖼️image_encoding from image'):\n",
457
+ " name_BC = must_start_with + name_B + name_C + must_end_with\n",
458
+ " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
459
+ " text_features = model.get_text_features(**ids_BC)\n",
460
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
461
+ " logit_scale = model.logit_scale.exp()\n",
462
+ " torch.matmul(text_features, image_features.t()) * logit_scale\n",
463
+ " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
464
+ " #-----#\n",
465
+ "\n",
466
+ " if(use == '📝text_encoding from prompt'):\n",
467
+ " name_BC = must_start_with + name_B + name_C + must_end_with\n",
468
+ " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
469
+ " text_features = model.get_text_features(**ids_BC)\n",
470
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
471
+ " sim_BC = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
472
  " #-----#\n",
473
  "\n",
474
  " result = sim_CB\n",
 
483
  "\n",
484
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
485
  "\n",
486
+ "\n",
487
+ "# @markdown ----------\n",
488
+ "# @markdown # Print options\n",
489
  "list_size = 100 # @param {type:'number'}\n",
490
  "print_ID = False # @param {type:\"boolean\"}\n",
491
  "print_Similarity = True # @param {type:\"boolean\"}\n",
 
565
  " #----#\n",
566
  " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
567
  "\n",
568
+ " if(use == '🖼️image_encoding from image'):\n",
569
+ " text_features = model.get_text_features(**ids)\n",
570
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
571
+ " logit_scale = model.logit_scale.exp()\n",
572
+ " torch.matmul(text_features, image_features.t()) * logit_scale\n",
573
+ " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
574
+ " #-----#\n",
575
+ "\n",
576
+ " if(use == '📝text_encoding from prompt'):\n",
577
+ " text_features = model.get_text_features(**ids)\n",
578
+ " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
579
+ " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
580
+ " #-----#\n",
581
+ "\n",
582
+ "\n",
583
  " dots[index] = sim\n",
584
  " names[index] = name\n",
585
+ "\n",
586
+ "\n",
587
  "#------#\n",
588
  "\n",
589
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
 
650
  "id": "hyK423TQCRup"
651
  }
652
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  {
654
  "cell_type": "markdown",
655
  "source": [