codeShare commited on
Commit
7d36f97
โ€ข
1 Parent(s): 8d4041f

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +153 -51
sd_token_similarity_calculator.ipynb CHANGED
@@ -122,41 +122,30 @@
122
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n"
123
  ],
124
  "metadata": {
125
- "id": "rUXQ73IbonHY",
126
- "outputId": "5ce85c43-136a-49b5-f316-a74e70085e88",
127
- "colab": {
128
- "base_uri": "https://localhost:8080/"
129
- }
130
  },
131
- "execution_count": 1,
132
- "outputs": [
133
- {
134
- "output_type": "stream",
135
- "name": "stdout",
136
- "text": [
137
- "/content\n",
138
- "Cloning into 'text-to-image-prompts'...\n",
139
- "remote: Enumerating objects: 521, done.\u001b[K\n",
140
- "remote: Counting objects: 100% (518/518), done.\u001b[K\n",
141
- "remote: Compressing objects: 100% (492/492), done.\u001b[K\n",
142
- "remote: Total 521 (delta 91), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
143
- "Receiving objects: 100% (521/521), 2.51 MiB | 6.14 MiB/s, done.\n",
144
- "Resolving deltas: 100% (91/91), done.\n",
145
- "Filtering content: 100% (95/95), 305.98 MiB | 37.71 MiB/s, done.\n"
146
- ]
147
- }
148
- ]
149
  },
150
  {
151
  "cell_type": "code",
152
  "source": [
153
  "# @title โœณ๏ธ Select items for the vocab\n",
154
  "\n",
155
- "prompt_features = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿฆœ\"}\n",
156
- "civitai_blue_set = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ“˜\"}\n",
157
- "suffix = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ”น\"}\n",
158
  "prefix = False # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ”ธ\"}\n",
159
  "emojis = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ˜ƒ\"}\n",
 
 
 
 
 
 
 
 
 
160
  "debug = False\n",
161
  "\n",
162
  "#------#\n",
@@ -165,30 +154,38 @@
165
  "nA = 0\n",
166
  "#--------#\n",
167
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  "if prompt_features:\n",
169
  " url = '/content/text-to-image-prompts/civitai-prompts/green'\n",
170
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
171
- " if debug:\n",
172
- " print(prompts[f'{nA}'])\n",
173
- " print(text_encodings[f'{nA}'])\n",
174
  "#--------#\n",
175
  "\n",
176
  "\n",
177
  "if emojis:\n",
178
  " url = '/content/text-to-image-prompts/tokens/emoji'\n",
179
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
180
- " if debug:\n",
181
- " print(prompts[f'{nA}'])\n",
182
- " print(text_encodings[f'{nA}'])\n",
183
  "#--------#\n",
184
  "\n",
185
  "\n",
186
  "if civitai_blue_set:\n",
187
  " url = '/content/text-to-image-prompts/civitai-prompts/blue'\n",
188
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
189
- " if debug:\n",
190
- " print(prompts[f'{nA}'])\n",
191
- " print(text_encodings[f'{nA}'])\n",
192
  "#--------#\n",
193
  "\n",
194
  "if suffix :\n",
@@ -196,9 +193,6 @@
196
  " for item in ['common','average','rare','weird','exotic'] :\n",
197
  " url = tmp + item\n",
198
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
199
- " if debug:\n",
200
- " print(prompts[f'{nA}'])\n",
201
- " print(text_encodings[f'{nA}'])\n",
202
  "#------#\n",
203
  "\n",
204
  "if prefix :\n",
@@ -206,9 +200,6 @@
206
  " for item in ['common','average','rare','weird','exotic'] :\n",
207
  " url = tmp + item\n",
208
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '-')\n",
209
- " if debug:\n",
210
- " print(prompts[f'{nA}'])\n",
211
- " print(text_encodings[f'{nA}'])\n",
212
  "#------#\n",
213
  "\n",
214
  "if debug:\n",
@@ -232,7 +223,7 @@
232
  "cell_type": "code",
233
  "source": [
234
  "# @title ๐Ÿ“ Get Prompt text_encoding similarity to the pre-calc. text_encodings\n",
235
- "prompt = \" a fast car on the road \" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
236
  "\n",
237
  "from transformers import AutoTokenizer\n",
238
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
@@ -259,7 +250,7 @@
259
  "metadata": {
260
  "id": "xc-PbIYF428y"
261
  },
262
- "execution_count": null,
263
  "outputs": []
264
  },
265
  {
@@ -368,13 +359,13 @@
368
  ],
369
  "metadata": {
370
  "id": "ke6mZ1RZDOeB",
371
- "outputId": "d8ef4589-8393-4001-ff35-c0c30646a576",
372
  "colab": {
373
  "base_uri": "https://localhost:8080/",
374
  "height": 1000
375
  }
376
  },
377
- "execution_count": null,
378
  "outputs": [
379
  {
380
  "output_type": "display_data",
@@ -418,7 +409,7 @@
418
  "metadata": {
419
  "id": "rebogpoyOG8k"
420
  },
421
- "execution_count": null,
422
  "outputs": []
423
  },
424
  {
@@ -426,7 +417,7 @@
426
  "source": [
427
  "# @title ๐Ÿ–ผ๏ธ Print the results\n",
428
  "list_size = 100 # @param {type:'number'}\n",
429
- "start_at_index = 100 # @param {type:'number'}\n",
430
  "print_Similarity = True # @param {type:\"boolean\"}\n",
431
  "print_Prompts = True # @param {type:\"boolean\"}\n",
432
  "print_Prefix = True # @param {type:\"boolean\"}\n",
@@ -466,10 +457,121 @@
466
  "#-------#"
467
  ],
468
  "metadata": {
469
- "id": "JkzncP8SgKtS"
 
 
 
 
470
  },
471
- "execution_count": null,
472
- "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  },
474
  {
475
  "cell_type": "code",
@@ -1346,7 +1448,7 @@
1346
  "base_uri": "https://localhost:8080/"
1347
  }
1348
  },
1349
- "execution_count": 5,
1350
  "outputs": [
1351
  {
1352
  "output_type": "stream",
 
122
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n"
123
  ],
124
  "metadata": {
125
+ "id": "rUXQ73IbonHY"
 
 
 
 
126
  },
127
+ "execution_count": null,
128
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  },
130
  {
131
  "cell_type": "code",
132
  "source": [
133
  "# @title โœณ๏ธ Select items for the vocab\n",
134
  "\n",
135
+ "prompt_features = False # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿฆœ\"}\n",
136
+ "civitai_blue_set = False # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ“˜\"}\n",
137
+ "suffix = False # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ”น\"}\n",
138
  "prefix = False # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ”ธ\"}\n",
139
  "emojis = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ˜ƒ\"}\n",
140
+ "#------#\n",
141
+ "\n",
142
+ "#These are borked\n",
143
+ "celebs = False # param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ†”๐Ÿ‘จ\"}\n",
144
+ "celebs_young = False # param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ”ธ\"}\n",
145
+ "#-------#\n",
146
+ "\n",
147
+ "#-----#\n",
148
+ "female_fullnames = True # @param {\"type\":\"boolean\",\"placeholder\":\"๐Ÿ˜ƒ\"}\n",
149
  "debug = False\n",
150
  "\n",
151
  "#------#\n",
 
154
  "nA = 0\n",
155
  "#--------#\n",
156
  "\n",
157
+ "\n",
158
+ "if celebs:\n",
159
+ " url = '/content/text-to-image-prompts/names/celebs/mixed'\n",
160
+ " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
161
+ "#--------#\n",
162
+ "\n",
163
+ "if celebs_young :\n",
164
+ " url = '/content/text-to-image-prompts/names/celebs/young'\n",
165
+ " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
166
+ "#--------#\n",
167
+ "\n",
168
+ "if female_fullnames:\n",
169
+ " url = '/content/text-to-image-prompts/names/fullnames'\n",
170
+ " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
171
+ "#--------#\n",
172
+ "\n",
173
+ "\n",
174
  "if prompt_features:\n",
175
  " url = '/content/text-to-image-prompts/civitai-prompts/green'\n",
176
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
 
 
 
177
  "#--------#\n",
178
  "\n",
179
  "\n",
180
  "if emojis:\n",
181
  " url = '/content/text-to-image-prompts/tokens/emoji'\n",
182
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
 
 
 
183
  "#--------#\n",
184
  "\n",
185
  "\n",
186
  "if civitai_blue_set:\n",
187
  " url = '/content/text-to-image-prompts/civitai-prompts/blue'\n",
188
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
 
 
 
189
  "#--------#\n",
190
  "\n",
191
  "if suffix :\n",
 
193
  " for item in ['common','average','rare','weird','exotic'] :\n",
194
  " url = tmp + item\n",
195
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
 
 
 
196
  "#------#\n",
197
  "\n",
198
  "if prefix :\n",
 
200
  " for item in ['common','average','rare','weird','exotic'] :\n",
201
  " url = tmp + item\n",
202
  " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '-')\n",
 
 
 
203
  "#------#\n",
204
  "\n",
205
  "if debug:\n",
 
223
  "cell_type": "code",
224
  "source": [
225
  "# @title ๐Ÿ“ Get Prompt text_encoding similarity to the pre-calc. text_encodings\n",
226
+ "prompt = \"Beyonce and Lady Gaga\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
227
  "\n",
228
  "from transformers import AutoTokenizer\n",
229
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
 
250
  "metadata": {
251
  "id": "xc-PbIYF428y"
252
  },
253
+ "execution_count": 9,
254
  "outputs": []
255
  },
256
  {
 
359
  ],
360
  "metadata": {
361
  "id": "ke6mZ1RZDOeB",
362
+ "outputId": "4e2c2845-a26f-4966-b560-51b2bd059192",
363
  "colab": {
364
  "base_uri": "https://localhost:8080/",
365
  "height": 1000
366
  }
367
  },
368
+ "execution_count": 11,
369
  "outputs": [
370
  {
371
  "output_type": "display_data",
 
409
  "metadata": {
410
  "id": "rebogpoyOG8k"
411
  },
412
+ "execution_count": 12,
413
  "outputs": []
414
  },
415
  {
 
417
  "source": [
418
  "# @title ๐Ÿ–ผ๏ธ Print the results\n",
419
  "list_size = 100 # @param {type:'number'}\n",
420
+ "start_at_index = 0 # @param {type:'number'}\n",
421
  "print_Similarity = True # @param {type:\"boolean\"}\n",
422
  "print_Prompts = True # @param {type:\"boolean\"}\n",
423
  "print_Prefix = True # @param {type:\"boolean\"}\n",
 
457
  "#-------#"
458
  ],
459
  "metadata": {
460
+ "id": "JkzncP8SgKtS",
461
+ "outputId": "16a3365b-1ae4-4ce3-b4aa-9f3a03e87735",
462
+ "colab": {
463
+ "base_uri": "https://localhost:8080/"
464
+ }
465
  },
466
+ "execution_count": 13,
467
+ "outputs": [
468
+ {
469
+ "output_type": "stream",
470
+ "name": "stdout",
471
+ "text": [
472
+ "{Tabytha Christion |\n",
473
+ "Neera Stimpert |\n",
474
+ "Eris Ororke |\n",
475
+ "Meelah Onan |\n",
476
+ "Khalia Okins |\n",
477
+ "Niajah Olshan |\n",
478
+ "Aariel Gutterman |\n",
479
+ "Indyia Orsborn |\n",
480
+ "Mairin Elfstrom |\n",
481
+ "Iliani Roshan |\n",
482
+ "Sapphire Bargstadt |\n",
483
+ "Maelin Kiel |\n",
484
+ "Janeah Easterwood |\n",
485
+ "Eris Kindelan |\n",
486
+ "Zanari Guillette |\n",
487
+ "Kree Chotibai |\n",
488
+ "Khaia Dutter |\n",
489
+ "Millennia Ekman |\n",
490
+ "Saphirah Kess |\n",
491
+ "Kahlan Romanczuk |\n",
492
+ "Jaza Rowton |\n",
493
+ "Niara Bargneare |\n",
494
+ "Janiaya Gudgel |\n",
495
+ "Antigone Kiddle |\n",
496
+ "Sariana Dyreson |\n",
497
+ "Sigourney Mamula |\n",
498
+ "Starlett Guill |\n",
499
+ "Yordanos Oien |\n",
500
+ "Halia Charadan |\n",
501
+ "Nephtalie Olsby |\n",
502
+ "Cortana Bartle |\n",
503
+ "Jaeda Echof |\n",
504
+ "Esthela Khalsa |\n",
505
+ "Varya Stimits |\n",
506
+ "Amaurie Romanok |\n",
507
+ "Saran Startz |\n",
508
+ "Xitlali Killian |\n",
509
+ "Karma Marchildon |\n",
510
+ "Liberti Olivars |\n",
511
+ "Aleen Rotch |\n",
512
+ "Ryenn Stanaland |\n",
513
+ "Jarae Grun |\n",
514
+ "Daja Orsburn |\n",
515
+ "Sariana Gwyn |\n",
516
+ "Roxann Gyllensten |\n",
517
+ "Yerania Stivason |\n",
518
+ "Karmyn Oroark |\n",
519
+ "Kaiulani Gulreg |\n",
520
+ "Clary Barbre |\n",
521
+ "Zamiya Guardarrama |\n",
522
+ "Aquila Roloson |\n",
523
+ "Jashara Kernan |\n",
524
+ "Serenity Barer |\n",
525
+ "Melynna Dygert |\n",
526
+ "Chakira Olvey |\n",
527
+ "Avagrace Rought |\n",
528
+ "Halo Barne |\n",
529
+ "Malori Guljar |\n",
530
+ "Tashai Barvosa |\n",
531
+ "Rani Okeson |\n",
532
+ "Ariadne Barias |\n",
533
+ "Zoraida Romanoff |\n",
534
+ "Loghan Orozeo |\n",
535
+ "Zurii Kesler |\n",
536
+ "Aseret Battani |\n",
537
+ "Genesi Elion |\n",
538
+ "Rin Stirna |\n",
539
+ "Kitara Gutshall |\n",
540
+ "Xiamara Omo |\n",
541
+ "Zamiya Barbur |\n",
542
+ "Diem Guerrera |\n",
543
+ "Janecia Choma |\n",
544
+ "Rain Eddens |\n",
545
+ "Neela Barella |\n",
546
+ "Zhane Gulfan |\n",
547
+ "Aeva Omeara |\n",
548
+ "Krosby Barbour |\n",
549
+ "Lilith Chitu |\n",
550
+ "Zerah Rogg |\n",
551
+ "Venus Roupe |\n",
552
+ "Nuriya Ogg |\n",
553
+ "Natallie Marabella |\n",
554
+ "Amyrah Elfrink |\n",
555
+ "Hailynn Gulnaz |\n",
556
+ "Zaniah Orrick |\n",
557
+ "Naia Gunanidhi |\n",
558
+ "Tanayjah Barryman |\n",
559
+ "Noomi Stavrides |\n",
560
+ "Lanesha Groot |\n",
561
+ "Daniah Stokan |\n",
562
+ "Avaeh Chanchuan |\n",
563
+ "Miaya Groote |\n",
564
+ "Kora Chebahtah |\n",
565
+ "Jessabelle Rosenhagen |\n",
566
+ "Zyrah Guley |\n",
567
+ "Eliette Guardipee |\n",
568
+ "Myha Charleton |\n",
569
+ "Ishitha Steckelberg |\n",
570
+ "Ayrionna Rothfuss |\n",
571
+ "Honor Olan }\n"
572
+ ]
573
+ }
574
+ ]
575
  },
576
  {
577
  "cell_type": "code",
 
1448
  "base_uri": "https://localhost:8080/"
1449
  }
1450
  },
1451
+ "execution_count": null,
1452
  "outputs": [
1453
  {
1454
  "output_type": "stream",