codeShare commited on
Commit
70d7fbb
1 Parent(s): 540a0c2

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +500 -28
sd_token_similarity_calculator.ipynb CHANGED
@@ -28,7 +28,7 @@
28
  {
29
  "cell_type": "code",
30
  "source": [
31
- "# @title Load/initialize values\n",
32
  "# Load the tokens into the colab\n",
33
  "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
34
  "import torch\n",
@@ -116,23 +116,10 @@
116
  "metadata": {
117
  "id": "Ch9puvwKH1s3",
118
  "collapsed": true,
119
- "cellView": "form",
120
- "outputId": "9a9d4274-a633-464b-e1fb-06a33f3dd873",
121
- "colab": {
122
- "base_uri": "https://localhost:8080/"
123
- }
124
  },
125
- "execution_count": 59,
126
- "outputs": [
127
- {
128
- "output_type": "stream",
129
- "name": "stdout",
130
- "text": [
131
- "fatal: destination path 'sd_tokens' already exists and is not an empty directory.\n",
132
- "/content/sd_tokens\n"
133
- ]
134
- }
135
- ]
136
  },
137
  {
138
  "cell_type": "code",
@@ -278,7 +265,8 @@
278
  "#Print the sorted list from above result"
279
  ],
280
  "metadata": {
281
- "id": "iWeFnT1gAx6A"
 
282
  },
283
  "execution_count": null,
284
  "outputs": []
@@ -315,7 +303,8 @@
315
  ],
316
  "metadata": {
317
  "id": "QQOjh5BvnG8M",
318
- "collapsed": true
 
319
  },
320
  "execution_count": null,
321
  "outputs": []
@@ -323,14 +312,497 @@
323
  {
324
  "cell_type": "code",
325
  "source": [
326
- "# @title 🪐 Find similiar prompt\n",
327
- "# @markdown Prompt A to match against\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  "prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
 
329
  "# @markdown Set conditions for the output\n",
330
- "must_start_with = \"bendy \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
331
  "must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
332
- "must_end_with = \" on a table\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
333
- "\n",
334
  "token_B = must_contain\n",
335
  "\n",
336
  "# @markdown Limit the search\n",
@@ -343,7 +815,6 @@
343
  "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
344
  "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
345
  "\n",
346
- "\n",
347
  "#Tokenize input B\n",
348
  "from transformers import AutoTokenizer\n",
349
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
@@ -427,8 +898,6 @@
427
  " dots[index] = result\n",
428
  "#----#\n",
429
  "\n",
430
- "\n",
431
- "\n",
432
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
433
  "\n",
434
  "# @markdown Print options\n",
@@ -464,6 +933,7 @@
464
  " print('--------')"
465
  ],
466
  "metadata": {
 
467
  "id": "uDtcm-l8UCJk"
468
  },
469
  "execution_count": null,
@@ -901,7 +1371,9 @@
901
  "\n",
902
  "There might be some updates in the future with features not mentioned here.\n",
903
  "\n",
904
- "//---//"
 
 
905
  ],
906
  "metadata": {
907
  "id": "njeJx_nSSA8H"
 
28
  {
29
  "cell_type": "code",
30
  "source": [
31
+ "# @title ✳️ Load/initialize values\n",
32
  "# Load the tokens into the colab\n",
33
  "!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
34
  "import torch\n",
 
116
  "metadata": {
117
  "id": "Ch9puvwKH1s3",
118
  "collapsed": true,
119
+ "cellView": "form"
 
 
 
 
120
  },
121
+ "execution_count": null,
122
+ "outputs": []
 
 
 
 
 
 
 
 
 
123
  },
124
  {
125
  "cell_type": "code",
 
265
  "#Print the sorted list from above result"
266
  ],
267
  "metadata": {
268
+ "id": "iWeFnT1gAx6A",
269
+ "cellView": "form"
270
  },
271
  "execution_count": null,
272
  "outputs": []
 
303
  ],
304
  "metadata": {
305
  "id": "QQOjh5BvnG8M",
306
+ "collapsed": true,
307
+ "cellView": "form"
308
  },
309
  "execution_count": null,
310
  "outputs": []
 
312
  {
313
  "cell_type": "code",
314
  "source": [
315
+ "# @title 🪐🖼️ -> 📝 Image to prompt : Add single token to existing prompt to match image\n",
316
+ "from google.colab import files\n",
317
+ "def getLocalFiles():\n",
318
+ " _files = files.upload()\n",
319
+ " if len(_files) >0:\n",
320
+ " for k,v in _files.items():\n",
321
+ " open(k,'wb').write(v)\n",
322
+ "\n",
323
+ "#Get image\n",
324
+ "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
325
+ "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload\"}\n",
326
+ "from PIL import Image\n",
327
+ "import requests\n",
328
+ "if url == \"\":\n",
329
+ " image_A = getLocalFiles()\n",
330
+ "else:\n",
331
+ " image_A = Image.open(requests.get(url, stream=True).raw)\n",
332
+ "\n",
333
+ "\n",
334
+ "# Get image features\n",
335
+ "from transformers import CLIPProcessor, CLIPModel\n",
336
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
337
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
338
+ "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
339
+ "image_features = model.get_image_features(**inputs)\n",
340
+ "text_encoding_A = image_features\n",
341
+ "A = text_encoding_A[0]\n",
342
+ "_A = LA.vector_norm(A, ord=2)\n",
343
+ "prompt_A = \"the image\"\n",
344
+ "name_A = prompt_A\n",
345
+ "#-----#\n",
346
+ "\n",
347
+ "# @markdown Set conditions for the output\n",
348
+ "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
349
+ "must_contain = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
350
+ "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
351
+ "token_B = must_contain\n",
352
+ "\n",
353
+ "# @markdown Limit the search\n",
354
+ "use_token_padding = True # @param {type:\"boolean\"}\n",
355
+ "start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
356
+ "search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n",
357
+ "restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
358
+ "\n",
359
+ "# @markdown Limit char size of included token\n",
360
+ "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
361
+ "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
362
+ "\n",
363
+ "#Tokenize input B\n",
364
+ "from transformers import AutoTokenizer\n",
365
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
366
+ "tokenizer_output = tokenizer(text = token_B)\n",
367
+ "input_ids = tokenizer_output['input_ids']\n",
368
+ "#-----#\n",
369
+ "name_B = must_contain\n",
370
+ "#-----#\n",
371
+ "\n",
372
+ "START = start_search_at_ID\n",
373
+ "RANGE = min(search_range , 49407 - start_search_at_ID)\n",
374
+ "\n",
375
+ "dots = torch.zeros(RANGE)\n",
376
+ "is_BC = torch.zeros(RANGE)\n",
377
+ "for index in range(RANGE):\n",
378
+ " id_C = START + index\n",
379
+ " C = token[id_C]\n",
380
+ " _C = LA.vector_norm(C, ord=2)\n",
381
+ " name_C = vocab[id_C]\n",
382
+ "\n",
383
+ " # Decide if we should process prefix/suffix tokens\n",
384
+ " if name_C.find('</w>')<=-1:\n",
385
+ " if restrictions != \"Prefix only\":\n",
386
+ " continue\n",
387
+ " else:\n",
388
+ " if restrictions == \"Prefix only\":\n",
389
+ " continue\n",
390
+ " #-----#\n",
391
+ "\n",
392
+ " # Decide if char-size is within range\n",
393
+ " if len(name_C) < min_char_size:\n",
394
+ " continue\n",
395
+ " if len(name_C) > min_char_size + char_range:\n",
396
+ " continue\n",
397
+ " #-----#\n",
398
+ "\n",
399
+ " name_CB = must_start_with + name_C + name_B + must_end_with\n",
400
+ " if restrictions == \"Prefix only\":\n",
401
+ " name_CB = must_start_with + name_C + '-' + name_B + must_end_with\n",
402
+ " #-----#\n",
403
+ " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
404
+ " text_encoding_CB = model.get_text_features(**ids_CB)\n",
405
+ " CB = text_encoding_CB[0]\n",
406
+ " _CB = LA.vector_norm(CB, ord=2)\n",
407
+ " sim_CB = torch.dot(A,CB)/(_A*_CB)\n",
408
+ " #-----#\n",
409
+ " if restrictions == \"Prefix only\":\n",
410
+ " result = sim_CB\n",
411
+ " result = result.item()\n",
412
+ " dots[index] = result\n",
413
+ " continue\n",
414
+ " #-----#\n",
415
+ " name_BC = must_start_with + name_B + name_C + must_end_with\n",
416
+ " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
417
+ " text_encoding_BC = model.get_text_features(**ids_BC)\n",
418
+ " BC = text_encoding_BC[0]\n",
419
+ " _BC = LA.vector_norm(BC, ord=2)\n",
420
+ " sim_BC = torch.dot(A,BC)/(_A*_BC)\n",
421
+ " #-----#\n",
422
+ "\n",
423
+ " result = sim_CB\n",
424
+ " if(sim_BC > sim_CB):\n",
425
+ " is_BC[index] = 1\n",
426
+ " result = sim_BC\n",
427
+ "\n",
428
+ " #result = absolute_value(result.item())\n",
429
+ " result = result.item()\n",
430
+ " dots[index] = result\n",
431
+ "#----#\n",
432
+ "\n",
433
+ "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
434
+ "\n",
435
+ "# @markdown Print options\n",
436
+ "list_size = 100 # @param {type:'number'}\n",
437
+ "print_ID = False # @param {type:\"boolean\"}\n",
438
+ "print_Similarity = True # @param {type:\"boolean\"}\n",
439
+ "print_Name = True # @param {type:\"boolean\"}\n",
440
+ "print_Divider = True # @param {type:\"boolean\"}\n",
441
+ "\n",
442
+ "\n",
443
+ "if (print_Divider):\n",
444
+ " print('//---//')\n",
445
+ "\n",
446
+ "print('')\n",
447
+ "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
448
+ "print('')\n",
449
+ "\n",
450
+ "for index in range(min(list_size,RANGE)):\n",
451
+ " id = START + indices[index].item()\n",
452
+ " if (print_Name):\n",
453
+ " if(is_BC[index]>0):\n",
454
+ " print(must_start_with + name_B + vocab[id] + must_end_with)\n",
455
+ " else:\n",
456
+ " if restrictions == \"Prefix only\":\n",
457
+ " print(must_start_with + vocab[id] + '-' + name_B + must_end_with)\n",
458
+ " else:\n",
459
+ " print(must_start_with + vocab[id] + name_B + must_end_with)\n",
460
+ " if (print_ID):\n",
461
+ " print(f'ID = {id}') # IDs\n",
462
+ " if (print_Similarity):\n",
463
+ " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
464
+ " if (print_Divider):\n",
465
+ " print('--------')\n",
466
+ "\n",
467
+ "\n",
468
+ "\n",
469
+ "\n",
470
+ "\n"
471
+ ],
472
+ "metadata": {
473
+ "collapsed": true,
474
+ "cellView": "form",
475
+ "id": "fi0jRruI0-tu",
476
+ "outputId": "6d7e8c39-a117-4b35-acfe-2a128c65aeb7",
477
+ "colab": {
478
+ "base_uri": "https://localhost:8080/"
479
+ }
480
+ },
481
+ "execution_count": 9,
482
+ "outputs": [
483
+ {
484
+ "output_type": "stream",
485
+ "name": "stdout",
486
+ "text": [
487
+ "//---//\n",
488
+ "\n",
489
+ "These token pairings within the range ID = 12500 to ID = 13000 most closely match the text_encoding for the prompt \"the image\" : \n",
490
+ "\n",
491
+ "sits</w>yellow\n",
492
+ "similiarity = 23.02 %\n",
493
+ "--------\n",
494
+ "neys</w>yellow\n",
495
+ "similiarity = 19.74 %\n",
496
+ "--------\n",
497
+ "cody</w>yellow\n",
498
+ "similiarity = 18.61 %\n",
499
+ "--------\n",
500
+ "wns</w>yellow\n",
501
+ "similiarity = 18.43 %\n",
502
+ "--------\n",
503
+ "java</w>yellow\n",
504
+ "similiarity = 18.15 %\n",
505
+ "--------\n",
506
+ "jj</w>yellow\n",
507
+ "similiarity = 18.03 %\n",
508
+ "--------\n",
509
+ "eno</w>yellow\n",
510
+ "similiarity = 17.87 %\n",
511
+ "--------\n",
512
+ "cled</w>yellow\n",
513
+ "similiarity = 17.85 %\n",
514
+ "--------\n",
515
+ "nom</w>yellow\n",
516
+ "similiarity = 17.75 %\n",
517
+ "--------\n",
518
+ "dads</w>yellow\n",
519
+ "similiarity = 17.5 %\n",
520
+ "--------\n",
521
+ "mil</w>yellow\n",
522
+ "similiarity = 17.47 %\n",
523
+ "--------\n",
524
+ "whom</w>yellow\n",
525
+ "similiarity = 17.37 %\n",
526
+ "--------\n",
527
+ "itv</w>yellow\n",
528
+ "similiarity = 17.34 %\n",
529
+ "--------\n",
530
+ "vibe</w>yellow\n",
531
+ "similiarity = 17.2 %\n",
532
+ "--------\n",
533
+ "noir</w>yellow\n",
534
+ "similiarity = 17.14 %\n",
535
+ "--------\n",
536
+ "yellowarel</w>\n",
537
+ "similiarity = 17.1 %\n",
538
+ "--------\n",
539
+ "#âĢ¦</w>yellow\n",
540
+ "similiarity = 17.04 %\n",
541
+ "--------\n",
542
+ "maya</w>yellow\n",
543
+ "similiarity = 17.03 %\n",
544
+ "--------\n",
545
+ "yellowbam</w>\n",
546
+ "similiarity = 17.01 %\n",
547
+ "--------\n",
548
+ "erts</w>yellow\n",
549
+ "similiarity = 17.01 %\n",
550
+ "--------\n",
551
+ "xc</w>yellow\n",
552
+ "similiarity = 16.98 %\n",
553
+ "--------\n",
554
+ "mob</w>yellow\n",
555
+ "similiarity = 16.89 %\n",
556
+ "--------\n",
557
+ "dees</w>yellow\n",
558
+ "similiarity = 16.87 %\n",
559
+ "--------\n",
560
+ "icc</w>yellow\n",
561
+ "similiarity = 16.75 %\n",
562
+ "--------\n",
563
+ "aly</w>yellow\n",
564
+ "similiarity = 16.63 %\n",
565
+ "--------\n",
566
+ "lis</w>yellow\n",
567
+ "similiarity = 16.63 %\n",
568
+ "--------\n",
569
+ "yellowturf</w>\n",
570
+ "similiarity = 16.62 %\n",
571
+ "--------\n",
572
+ "yellowbaba</w>\n",
573
+ "similiarity = 16.58 %\n",
574
+ "--------\n",
575
+ ":*</w>yellow\n",
576
+ "similiarity = 16.42 %\n",
577
+ "--------\n",
578
+ "inho</w>yellow\n",
579
+ "similiarity = 16.39 %\n",
580
+ "--------\n",
581
+ "yellowhes</w>\n",
582
+ "similiarity = 16.37 %\n",
583
+ "--------\n",
584
+ "nity</w>yellow\n",
585
+ "similiarity = 16.3 %\n",
586
+ "--------\n",
587
+ "lust</w>yellow\n",
588
+ "similiarity = 16.3 %\n",
589
+ "--------\n",
590
+ "ikh</w>yellow\n",
591
+ "similiarity = 16.26 %\n",
592
+ "--------\n",
593
+ "nyt</w>yellow\n",
594
+ "similiarity = 16.24 %\n",
595
+ "--------\n",
596
+ "(+</w>yellow\n",
597
+ "similiarity = 16.11 %\n",
598
+ "--------\n",
599
+ "foto</w>yellow\n",
600
+ "similiarity = 16.11 %\n",
601
+ "--------\n",
602
+ "stl</w>yellow\n",
603
+ "similiarity = 16.06 %\n",
604
+ "--------\n",
605
+ "mick</w>yellow\n",
606
+ "similiarity = 16.06 %\n",
607
+ "--------\n",
608
+ "...@</w>yellow\n",
609
+ "similiarity = 16.05 %\n",
610
+ "--------\n",
611
+ "ugh</w>yellow\n",
612
+ "similiarity = 16.05 %\n",
613
+ "--------\n",
614
+ "gro</w>yellow\n",
615
+ "similiarity = 16.01 %\n",
616
+ "--------\n",
617
+ "wski</w>yellow\n",
618
+ "similiarity = 16.01 %\n",
619
+ "--------\n",
620
+ "ðŁĴ«</w>yellow\n",
621
+ "similiarity = 15.74 %\n",
622
+ "--------\n",
623
+ "deen</w>yellow\n",
624
+ "similiarity = 15.73 %\n",
625
+ "--------\n",
626
+ "assy</w>yellow\n",
627
+ "similiarity = 15.72 %\n",
628
+ "--------\n",
629
+ "mtv</w>yellow\n",
630
+ "similiarity = 15.72 %\n",
631
+ "--------\n",
632
+ "yellowðŁĺ»</w>\n",
633
+ "similiarity = 15.72 %\n",
634
+ "--------\n",
635
+ "yellowfrm</w>\n",
636
+ "similiarity = 15.65 %\n",
637
+ "--------\n",
638
+ "moss</w>yellow\n",
639
+ "similiarity = 15.64 %\n",
640
+ "--------\n",
641
+ "bart</w>yellow\n",
642
+ "similiarity = 15.61 %\n",
643
+ "--------\n",
644
+ "tw</w>yellow\n",
645
+ "similiarity = 15.51 %\n",
646
+ "--------\n",
647
+ "yellowplug</w>\n",
648
+ "similiarity = 15.46 %\n",
649
+ "--------\n",
650
+ "jen</w>yellow\n",
651
+ "similiarity = 15.45 %\n",
652
+ "--------\n",
653
+ "pst</w>yellow\n",
654
+ "similiarity = 15.43 %\n",
655
+ "--------\n",
656
+ "omfg</w>yellow\n",
657
+ "similiarity = 15.43 %\n",
658
+ "--------\n",
659
+ "dine</w>yellow\n",
660
+ "similiarity = 15.38 %\n",
661
+ "--------\n",
662
+ "vern</w>yellow\n",
663
+ "similiarity = 15.33 %\n",
664
+ "--------\n",
665
+ "reno</w>yellow\n",
666
+ "similiarity = 15.25 %\n",
667
+ "--------\n",
668
+ "yellow´</w>\n",
669
+ "similiarity = 15.14 %\n",
670
+ "--------\n",
671
+ "omic</w>yellow\n",
672
+ "similiarity = 15.14 %\n",
673
+ "--------\n",
674
+ "łï¸ı</w>yellow\n",
675
+ "similiarity = 15.11 %\n",
676
+ "--------\n",
677
+ "yellowgis</w>\n",
678
+ "similiarity = 15.06 %\n",
679
+ "--------\n",
680
+ "aunt</w>yellow\n",
681
+ "similiarity = 15.0 %\n",
682
+ "--------\n",
683
+ "joan</w>yellow\n",
684
+ "similiarity = 14.96 %\n",
685
+ "--------\n",
686
+ "anas</w>yellow\n",
687
+ "similiarity = 14.92 %\n",
688
+ "--------\n",
689
+ "ðŁĴĵ</w>yellow\n",
690
+ "similiarity = 14.9 %\n",
691
+ "--------\n",
692
+ "chad</w>yellow\n",
693
+ "similiarity = 14.89 %\n",
694
+ "--------\n",
695
+ "yellowsake</w>\n",
696
+ "similiarity = 14.88 %\n",
697
+ "--------\n",
698
+ "gues</w>yellow\n",
699
+ "similiarity = 14.84 %\n",
700
+ "--------\n",
701
+ "gian</w>yellow\n",
702
+ "similiarity = 14.84 %\n",
703
+ "--------\n",
704
+ "asi</w>yellow\n",
705
+ "similiarity = 14.83 %\n",
706
+ "--------\n",
707
+ "yellowoven</w>\n",
708
+ "similiarity = 14.82 %\n",
709
+ "--------\n",
710
+ "jury</w>yellow\n",
711
+ "similiarity = 14.79 %\n",
712
+ "--------\n",
713
+ "blvd</w>yellow\n",
714
+ "similiarity = 14.75 %\n",
715
+ "--------\n",
716
+ "omez</w>yellow\n",
717
+ "similiarity = 14.72 %\n",
718
+ "--------\n",
719
+ "yellowyang</w>\n",
720
+ "similiarity = 14.7 %\n",
721
+ "--------\n",
722
+ "gu</w>yellow\n",
723
+ "similiarity = 14.48 %\n",
724
+ "--------\n",
725
+ "yellowova</w>\n",
726
+ "similiarity = 14.45 %\n",
727
+ "--------\n",
728
+ "yellowinez</w>\n",
729
+ "similiarity = 14.44 %\n",
730
+ "--------\n",
731
+ "pei</w>yellow\n",
732
+ "similiarity = 14.44 %\n",
733
+ "--------\n",
734
+ "ãĢIJ</w>yellow\n",
735
+ "similiarity = 14.43 %\n",
736
+ "--------\n",
737
+ "ãĢij</w>yellow\n",
738
+ "similiarity = 14.43 %\n",
739
+ "--------\n",
740
+ "ðŁĮŀ</w>yellow\n",
741
+ "similiarity = 14.36 %\n",
742
+ "--------\n",
743
+ "ðŁĺĿ</w>yellow\n",
744
+ "similiarity = 14.27 %\n",
745
+ "--------\n",
746
+ "troy</w>yellow\n",
747
+ "similiarity = 14.16 %\n",
748
+ "--------\n",
749
+ "pale</w>yellow\n",
750
+ "similiarity = 14.14 %\n",
751
+ "--------\n",
752
+ "boi</w>yellow\n",
753
+ "similiarity = 14.11 %\n",
754
+ "--------\n",
755
+ "nn</w>yellow\n",
756
+ "similiarity = 14.08 %\n",
757
+ "--------\n",
758
+ "âı°</w>yellow\n",
759
+ "similiarity = 14.01 %\n",
760
+ "--------\n",
761
+ "ooth</w>yellow\n",
762
+ "similiarity = 13.93 %\n",
763
+ "--------\n",
764
+ "pied</w>yellow\n",
765
+ "similiarity = 13.9 %\n",
766
+ "--------\n",
767
+ "bola</w>yellow\n",
768
+ "similiarity = 13.79 %\n",
769
+ "--------\n",
770
+ "âŀ¡</w>yellow\n",
771
+ "similiarity = 13.77 %\n",
772
+ "--------\n",
773
+ "rena</w>yellow\n",
774
+ "similiarity = 13.75 %\n",
775
+ "--------\n",
776
+ "dley</w>yellow\n",
777
+ "similiarity = 13.73 %\n",
778
+ "--------\n",
779
+ "evan</w>yellow\n",
780
+ "similiarity = 13.67 %\n",
781
+ "--------\n",
782
+ "pony</w>yellow\n",
783
+ "similiarity = 13.63 %\n",
784
+ "--------\n",
785
+ "rene</w>yellow\n",
786
+ "similiarity = 13.62 %\n",
787
+ "--------\n",
788
+ "mock</w>yellow\n",
789
+ "similiarity = 13.57 %\n",
790
+ "--------\n"
791
+ ]
792
+ }
793
+ ]
794
+ },
795
+ {
796
+ "cell_type": "code",
797
+ "source": [
798
+ "# @title 🪐📝 Prompt to prompt : Add single token to existing prompt to match another prompt\n",
799
+ "# @markdown Write a text to match against...\n",
800
  "prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
801
+ "\n",
802
  "# @markdown Set conditions for the output\n",
803
+ "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
804
  "must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
805
+ "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
 
806
  "token_B = must_contain\n",
807
  "\n",
808
  "# @markdown Limit the search\n",
 
815
  "min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
816
  "char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
817
  "\n",
 
818
  "#Tokenize input B\n",
819
  "from transformers import AutoTokenizer\n",
820
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
 
898
  " dots[index] = result\n",
899
  "#----#\n",
900
  "\n",
 
 
901
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
902
  "\n",
903
  "# @markdown Print options\n",
 
933
  " print('--------')"
934
  ],
935
  "metadata": {
936
+ "cellView": "form",
937
  "id": "uDtcm-l8UCJk"
938
  },
939
  "execution_count": null,
 
1371
  "\n",
1372
  "There might be some updates in the future with features not mentioned here.\n",
1373
  "\n",
1374
+ "//---//\n",
1375
+ "\n",
1376
+ "https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/"
1377
  ],
1378
  "metadata": {
1379
  "id": "njeJx_nSSA8H"