BongoCaat commited on
Commit
316b8ea
1 Parent(s): c4356c0
Files changed (1) hide show
  1. app.py +611 -0
app.py ADDED
@@ -0,0 +1,611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "view-in-github",
7
+ "colab_type": "text"
8
+ },
9
+ "source": [
10
+ "<a href=\"https://colab.research.google.com/github/qunash/stable-diffusion-2-gui/blob/main/stable_diffusion_2_0.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "markdown",
15
+ "metadata": {
16
+ "id": "620o1BxdNbgq"
17
+ },
18
+ "source": [
19
+ "# **Stable Diffusion 2.1**\n",
20
+ "Gradio app for [Stable Diffusion 2](https://huggingface.co/stabilityai/stable-diffusion-2) by [Stability AI](https://stability.ai/) (v2-1_768-ema-pruned.ckpt).\n",
21
+ "It uses [Hugging Face](https://huggingface.co/) Diffusers🧨 implementation.\n",
22
+ "\n",
23
+ "Currently supported pipelines are `text-to-image`, `image-to-image`, `inpainting`, `4x upscaling` and `depth-to-image`.\n",
24
+ "\n",
25
+ "<br>\n",
26
+ "\n",
27
+ "Colab by [anzorq](https://twitter.com/hahahahohohe). If you like it, please consider supporting me:\n",
28
+ "\n",
29
+ "[<a href=\"https://www.buymeacoffee.com/anzorq\" target=\"_blank\"><img src=\"https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png\" height=\"32px\" width=\"108px\" alt=\"Buy Me A Coffee\"></a>](https://www.buymeacoffee.com/anzorq)\n",
30
+ "<br>\n",
31
+ "[![GitHub Repo stars](https://img.shields.io/github/stars/qunash/stable-diffusion-2-gui?style=social)](https://github.com/qunash/stable-diffusion-2-gui)\n",
32
+ "\n",
33
+ "![visitors](https://visitor-badge.glitch.me/badge?page_id=anzorq.sd-2-colab-header)"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "markdown",
38
+ "metadata": {
39
+ "id": "KQI4RX20DW_8"
40
+ },
41
+ "source": [
42
+ "# Install dependencies (~1.5 mins)"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "metadata": {
49
+ "id": "78HoqRAB-cES",
50
+ "cellView": "form"
51
+ },
52
+ "outputs": [],
53
+ "source": [
54
+ "!pip install --upgrade git+https://github.com/huggingface/diffusers.git\n",
55
+ "# !pip install diffusers\n",
56
+ "!pip install --upgrade git+https://github.com/huggingface/transformers/\n",
57
+ "# !pip install transformers\n",
58
+ "!pip install accelerate==0.12.0\n",
59
+ "!pip install scipy\n",
60
+ "!pip install ftfy\n",
61
+ "!pip install gradio -q\n",
62
+ "\n",
63
+ "#@markdown ### ⬅️ Run this cell\n",
64
+ "#@markdown ---\n",
65
+ "#@markdown ### Install **xformers**?\n",
66
+ "#@markdown This will take an additional ~3.5 mins.<br>But images will generate 25-40% faster.\n",
67
+ "install_xformers = False #@param {type:\"boolean\"}\n",
68
+ "\n",
69
+ "if install_xformers:\n",
70
+ " import os\n",
71
+ " from subprocess import getoutput\n",
72
+ "\n",
73
+ " os.system(\"pip install --extra-index-url https://download.pytorch.org/whl/cu113 torch torchvision==0.13.1+cu113\")\n",
74
+ " os.system(\"pip install triton==2.0.0.dev20220701\")\n",
75
+ " gpu_info = getoutput('nvidia-smi')\n",
76
+ " if(\"A10G\" in gpu_info):\n",
77
+ " os.system(f\"pip install -q https://github.com/camenduru/stable-diffusion-webui-colab/releases/download/0.0.15/xformers-0.0.15.dev0+4c06c79.d20221205-cp38-cp38-linux_x86_64.whl\")\n",
78
+ " elif(\"T4\" in gpu_info):\n",
79
+ " os.system(f\"pip install -q https://github.com/camenduru/stable-diffusion-webui-colab/releases/download/0.0.15/xformers-0.0.15.dev0+1515f77.d20221130-cp38-cp38-linux_x86_64.whl\")\n",
80
+ "\n",
81
+ "\n",
82
+ "# ### install xformers\n",
83
+ "# from IPython.utils import capture\n",
84
+ "# from subprocess import getoutput\n",
85
+ "# from re import search\n",
86
+ "\n",
87
+ "# with capture.capture_output() as cap:\n",
88
+ " \n",
89
+ "# smi_out = getoutput('nvidia-smi')\n",
90
+ "# supported = search('(T4|P100|V100|A100|K80)', smi_out)\n",
91
+ "\n",
92
+ "# if not supported:\n",
93
+ "# while True:\n",
94
+ "# print(\"\\x1b[1;31mThe current GPU is not supported, try starting a new session.\\x1b[0m\")\n",
95
+ "# else:\n",
96
+ "# supported = supported.group(0)\n",
97
+ "\n",
98
+ "# !pip install -q https://github.com/TheLastBen/fast-stable-diffusion/raw/main/precompiled/{supported}/xformers-0.0.13.dev0-py3-none-any.whl\n",
99
+ "# !pip install -q https://github.com/ShivamShrirao/xformers-wheels/releases/download/4c06c79/xformers-0.0.15.dev0+4c06c79.d20221201-cp38-cp38-linux_x86_64.whl"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "markdown",
104
+ "metadata": {
105
+ "id": "OOPHNsFYDbc0"
106
+ },
107
+ "source": [
108
+ "# Run the app"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "metadata": {
115
+ "cellView": "form",
116
+ "id": "gId0-asCBVwL"
117
+ },
118
+ "outputs": [],
119
+ "source": [
120
+ "#@title ⬇️🖼️\n",
121
+ "from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionUpscalePipeline, DiffusionPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler\n",
122
+ "import gradio as gr\n",
123
+ "import torch\n",
124
+ "from PIL import Image\n",
125
+ "import random\n",
126
+ "\n",
127
+ "state = None\n",
128
+ "current_steps = 25\n",
129
+ "attn_slicing_enabled = True\n",
130
+ "mem_eff_attn_enabled = install_xformers\n",
131
+ "\n",
132
+ "# model_id = 'stabilityai/stable-diffusion-2'\n",
133
+ "model_id = 'stabilityai/stable-diffusion-2-1'\n",
134
+ "\n",
135
+ "scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder=\"scheduler\")\n",
136
+ "\n",
137
+ "pipe = StableDiffusionPipeline.from_pretrained(\n",
138
+ " model_id,\n",
139
+ " revision=\"fp16\" if torch.cuda.is_available() else \"fp32\",\n",
140
+ " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
141
+ " scheduler=scheduler\n",
142
+ " ).to(\"cuda\")\n",
143
+ "pipe.enable_attention_slicing()\n",
144
+ "if mem_eff_attn_enabled:\n",
145
+ " pipe.enable_xformers_memory_efficient_attention()\n",
146
+ "\n",
147
+ "pipe_i2i = None\n",
148
+ "pipe_upscale = None\n",
149
+ "pipe_inpaint = None\n",
150
+ "pipe_depth2img = None\n",
151
+ "\n",
152
+ "\n",
153
+ "modes = {\n",
154
+ " 'txt2img': 'Text to Image',\n",
155
+ " 'img2img': 'Image to Image',\n",
156
+ " 'inpaint': 'Inpainting',\n",
157
+ " 'upscale4x': 'Upscale 4x',\n",
158
+ " 'depth2img': 'Depth to Image'\n",
159
+ "}\n",
160
+ "current_mode = modes['txt2img']\n",
161
+ "\n",
162
+ "def error_str(error, title=\"Error\"):\n",
163
+ " return f\"\"\"#### {title}\n",
164
+ " {error}\"\"\" if error else \"\"\n",
165
+ "\n",
166
+ "def update_state(new_state):\n",
167
+ " global state\n",
168
+ " state = new_state\n",
169
+ "\n",
170
+ "def update_state_info(old_state):\n",
171
+ " if state and state != old_state:\n",
172
+ " return gr.update(value=state)\n",
173
+ "\n",
174
+ "def set_mem_optimizations(pipe):\n",
175
+ " if attn_slicing_enabled:\n",
176
+ " pipe.enable_attention_slicing()\n",
177
+ " else:\n",
178
+ " pipe.disable_attention_slicing()\n",
179
+ " \n",
180
+ " if mem_eff_attn_enabled:\n",
181
+ " pipe.enable_xformers_memory_efficient_attention()\n",
182
+ " else:\n",
183
+ " pipe.disable_xformers_memory_efficient_attention()\n",
184
+ "\n",
185
+ "def get_i2i_pipe(scheduler):\n",
186
+ " \n",
187
+ " update_state(\"Loading image to image model...\")\n",
188
+ "\n",
189
+ " pipe = StableDiffusionImg2ImgPipeline.from_pretrained(\n",
190
+ " model_id,\n",
191
+ " revision=\"fp16\" if torch.cuda.is_available() else \"fp32\",\n",
192
+ " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
193
+ " scheduler=scheduler\n",
194
+ " )\n",
195
+ " set_mem_optimizations(pipe)\n",
196
+ " pipe.to(\"cuda\")\n",
197
+ " return pipe\n",
198
+ "\n",
199
+ "def get_inpaint_pipe():\n",
200
+ " \n",
201
+ " update_state(\"Loading inpainting model...\")\n",
202
+ "\n",
203
+ " pipe = DiffusionPipeline.from_pretrained(\n",
204
+ " \"stabilityai/stable-diffusion-2-inpainting\",\n",
205
+ " revision=\"fp16\" if torch.cuda.is_available() else \"fp32\",\n",
206
+ " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
207
+ " # scheduler=scheduler # TODO currently setting scheduler here messes up the end result. A bug in Diffusers🧨\n",
208
+ " ).to(\"cuda\")\n",
209
+ " pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n",
210
+ " pipe.enable_attention_slicing()\n",
211
+ " pipe.enable_xformers_memory_efficient_attention()\n",
212
+ " return pipe\n",
213
+ "\n",
214
+ "def get_upscale_pipe(scheduler):\n",
215
+ " \n",
216
+ " update_state(\"Loading upscale model...\")\n",
217
+ "\n",
218
+ " pipe = StableDiffusionUpscalePipeline.from_pretrained(\n",
219
+ " \"stabilityai/stable-diffusion-x4-upscaler\",\n",
220
+ " revision=\"fp16\" if torch.cuda.is_available() else \"fp32\",\n",
221
+ " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
222
+ " # scheduler=scheduler\n",
223
+ " )\n",
224
+ " # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n",
225
+ " set_mem_optimizations(pipe)\n",
226
+ " pipe.to(\"cuda\")\n",
227
+ " return pipe\n",
228
+ " \n",
229
+ "def get_depth2img_pipe():\n",
230
+ " \n",
231
+ " update_state(\"Loading depth to image model...\")\n",
232
+ "\n",
233
+ " pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(\n",
234
+ " \"stabilityai/stable-diffusion-2-depth\",\n",
235
+ " revision=\"fp16\" if torch.cuda.is_available() else \"fp32\",\n",
236
+ " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
237
+ " # scheduler=scheduler\n",
238
+ " )\n",
239
+ " pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n",
240
+ " set_mem_optimizations(pipe)\n",
241
+ " pipe.to(\"cuda\")\n",
242
+ " return pipe\n",
243
+ "\n",
244
+ "def switch_attention_slicing(attn_slicing):\n",
245
+ " global attn_slicing_enabled\n",
246
+ " attn_slicing_enabled = attn_slicing\n",
247
+ "\n",
248
+ "def switch_mem_eff_attn(mem_eff_attn):\n",
249
+ " global mem_eff_attn_enabled\n",
250
+ " mem_eff_attn_enabled = mem_eff_attn\n",
251
+ "\n",
252
+ "def pipe_callback(step: int, timestep: int, latents: torch.FloatTensor):\n",
253
+ " update_state(f\"{step}/{current_steps} steps\")#\\nTime left, sec: {timestep/100:.0f}\")\n",
254
+ "\n",
255
+ "def inference(inf_mode, prompt, n_images, guidance, steps, width=768, height=768, seed=0, img=None, strength=0.5, neg_prompt=\"\"):\n",
256
+ "\n",
257
+ " update_state(\" \")\n",
258
+ "\n",
259
+ " global current_mode\n",
260
+ " if inf_mode != current_mode:\n",
261
+ " pipe.to(\"cuda\" if inf_mode == modes['txt2img'] else \"cpu\")\n",
262
+ "\n",
263
+ " if pipe_i2i is not None:\n",
264
+ " pipe_i2i.to(\"cuda\" if inf_mode == modes['img2img'] else \"cpu\")\n",
265
+ "\n",
266
+ " if pipe_inpaint is not None:\n",
267
+ " pipe_inpaint.to(\"cuda\" if inf_mode == modes['inpaint'] else \"cpu\")\n",
268
+ "\n",
269
+ " if pipe_upscale is not None:\n",
270
+ " pipe_upscale.to(\"cuda\" if inf_mode == modes['upscale4x'] else \"cpu\")\n",
271
+ " \n",
272
+ " if pipe_depth2img is not None:\n",
273
+ " pipe_depth2img.to(\"cuda\" if inf_mode == modes['depth2img'] else \"cpu\")\n",
274
+ "\n",
275
+ " current_mode = inf_mode\n",
276
+ " \n",
277
+ " if seed == 0:\n",
278
+ " seed = random.randint(0, 2147483647)\n",
279
+ "\n",
280
+ " generator = torch.Generator('cuda').manual_seed(seed)\n",
281
+ " prompt = prompt\n",
282
+ "\n",
283
+ " try:\n",
284
+ " \n",
285
+ " if inf_mode == modes['txt2img']:\n",
286
+ " return txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator, seed), gr.update(visible=False, value=None)\n",
287
+ " \n",
288
+ " elif inf_mode == modes['img2img']:\n",
289
+ " if img is None:\n",
290
+ " return None, gr.update(visible=True, value=error_str(\"Image is required for Image to Image mode\"))\n",
291
+ "\n",
292
+ " return img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator, seed), gr.update(visible=False, value=None)\n",
293
+ " \n",
294
+ " elif inf_mode == modes['inpaint']:\n",
295
+ " if img is None:\n",
296
+ " return None, gr.update(visible=True, value=error_str(\"Image is required for Inpainting mode\"))\n",
297
+ "\n",
298
+ " return inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator, seed), gr.update(visible=False, value=None)\n",
299
+ "\n",
300
+ " elif inf_mode == modes['upscale4x']:\n",
301
+ " if img is None:\n",
302
+ " return None, gr.update(visible=True, value=error_str(\"Image is required for Upscale mode\"))\n",
303
+ "\n",
304
+ " return upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator), gr.update(visible=False, value=None)\n",
305
+ "\n",
306
+ " elif inf_mode == modes['depth2img']:\n",
307
+ " if img is None:\n",
308
+ " return None, gr.update(visible=True, value=error_str(\"Image is required for Depth to Image mode\"))\n",
309
+ "\n",
310
+ " return depth2img(prompt, n_images, neg_prompt, img, guidance, steps, generator, seed), gr.update(visible=False, value=None)\n",
311
+ "\n",
312
+ " except Exception as e:\n",
313
+ " return None, gr.update(visible=True, value=error_str(e))\n",
314
+ "\n",
315
+ "def txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator, seed):\n",
316
+ "\n",
317
+ " result = pipe(\n",
318
+ " prompt,\n",
319
+ " num_images_per_prompt = n_images,\n",
320
+ " negative_prompt = neg_prompt,\n",
321
+ " num_inference_steps = int(steps),\n",
322
+ " guidance_scale = guidance,\n",
323
+ " width = width,\n",
324
+ " height = height,\n",
325
+ " generator = generator,\n",
326
+ " callback=pipe_callback).images\n",
327
+ "\n",
328
+ " update_state(f\"Done. Seed: {seed}\")\n",
329
+ "\n",
330
+ " return result\n",
331
+ "\n",
332
+ "def img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator, seed):\n",
333
+ "\n",
334
+ " global pipe_i2i\n",
335
+ " if pipe_i2i is None:\n",
336
+ " pipe_i2i = get_i2i_pipe(scheduler)\n",
337
+ "\n",
338
+ " img = img['image']\n",
339
+ " ratio = min(height / img.height, width / img.width)\n",
340
+ " img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)\n",
341
+ " result = pipe_i2i(\n",
342
+ " prompt,\n",
343
+ " num_images_per_prompt = n_images,\n",
344
+ " negative_prompt = neg_prompt,\n",
345
+ " image = img,\n",
346
+ " num_inference_steps = int(steps),\n",
347
+ " strength = strength,\n",
348
+ " guidance_scale = guidance,\n",
349
+ " # width = width,\n",
350
+ " # height = height,\n",
351
+ " generator = generator,\n",
352
+ " callback=pipe_callback).images\n",
353
+ "\n",
354
+ " update_state(f\"Done. Seed: {seed}\")\n",
355
+ " \n",
356
+ " return result\n",
357
+ "\n",
358
+ "# TODO Currently supports only 512x512 images\n",
359
+ "def inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator, seed):\n",
360
+ "\n",
361
+ " global pipe_inpaint\n",
362
+ " if pipe_inpaint is None:\n",
363
+ " pipe_inpaint = get_inpaint_pipe()\n",
364
+ "\n",
365
+ " inp_img = img['image']\n",
366
+ " mask = img['mask']\n",
367
+ " inp_img = square_padding(inp_img)\n",
368
+ " mask = square_padding(mask)\n",
369
+ "\n",
370
+ " # # ratio = min(height / inp_img.height, width / inp_img.width)\n",
371
+ " # ratio = min(512 / inp_img.height, 512 / inp_img.width)\n",
372
+ " # inp_img = inp_img.resize((int(inp_img.width * ratio), int(inp_img.height * ratio)), Image.LANCZOS)\n",
373
+ " # mask = mask.resize((int(mask.width * ratio), int(mask.height * ratio)), Image.LANCZOS)\n",
374
+ "\n",
375
+ " inp_img = inp_img.resize((512, 512))\n",
376
+ " mask = mask.resize((512, 512))\n",
377
+ "\n",
378
+ " result = pipe_inpaint(\n",
379
+ " prompt,\n",
380
+ " image = inp_img,\n",
381
+ " mask_image = mask,\n",
382
+ " num_images_per_prompt = n_images,\n",
383
+ " negative_prompt = neg_prompt,\n",
384
+ " num_inference_steps = int(steps),\n",
385
+ " guidance_scale = guidance,\n",
386
+ " # width = width,\n",
387
+ " # height = height,\n",
388
+ " generator = generator,\n",
389
+ " callback=pipe_callback).images\n",
390
+ " \n",
391
+ " update_state(f\"Done. Seed: {seed}\")\n",
392
+ "\n",
393
+ " return result\n",
394
+ "\n",
395
+ "def depth2img(prompt, n_images, neg_prompt, img, guidance, steps, generator, seed):\n",
396
+ "\n",
397
+ " global pipe_depth2img\n",
398
+ " if pipe_depth2img is None:\n",
399
+ " pipe_depth2img = get_depth2img_pipe()\n",
400
+ "\n",
401
+ " img = img['image']\n",
402
+ " result = pipe_depth2img(\n",
403
+ " prompt,\n",
404
+ " num_images_per_prompt = n_images,\n",
405
+ " negative_prompt = neg_prompt,\n",
406
+ " image = img,\n",
407
+ " num_inference_steps = int(steps),\n",
408
+ " guidance_scale = guidance,\n",
409
+ " # width = width,\n",
410
+ " # height = height,\n",
411
+ " generator = generator,\n",
412
+ " callback=pipe_callback).images\n",
413
+ "\n",
414
+ " update_state(f\"Done. Seed: {seed}\")\n",
415
+ " \n",
416
+ " return result\n",
417
+ "\n",
418
+ "def square_padding(img):\n",
419
+ " width, height = img.size\n",
420
+ " if width == height:\n",
421
+ " return img\n",
422
+ " new_size = max(width, height)\n",
423
+ " new_img = Image.new('RGB', (new_size, new_size), (0, 0, 0, 255))\n",
424
+ " new_img.paste(img, ((new_size - width) // 2, (new_size - height) // 2))\n",
425
+ " return new_img\n",
426
+ "\n",
427
+ "def upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator):\n",
428
+ "\n",
429
+ " global pipe_upscale\n",
430
+ " if pipe_upscale is None:\n",
431
+ " pipe_upscale = get_upscale_pipe(scheduler)\n",
432
+ "\n",
433
+ " img = img['image']\n",
434
+ " return upscale_tiling(prompt, neg_prompt, img, guidance, steps, generator)\n",
435
+ "\n",
436
+ " # result = pipe_upscale(\n",
437
+ " # prompt,\n",
438
+ " # image = img,\n",
439
+ " # num_inference_steps = int(steps),\n",
440
+ " # guidance_scale = guidance,\n",
441
+ " # negative_prompt = neg_prompt,\n",
442
+ " # num_images_per_prompt = n_images,\n",
443
+ " # generator = generator).images[0]\n",
444
+ "\n",
445
+ " # return result\n",
446
+ "\n",
447
+ "def upscale_tiling(prompt, neg_prompt, img, guidance, steps, generator):\n",
448
+ "\n",
449
+ " width, height = img.size\n",
450
+ "\n",
451
+ " # calculate the padding needed to make the image dimensions a multiple of 128\n",
452
+ " padding_x = 128 - (width % 128) if width % 128 != 0 else 0\n",
453
+ " padding_y = 128 - (height % 128) if height % 128 != 0 else 0\n",
454
+ "\n",
455
+ " # create a white image of the right size to be used as padding\n",
456
+ " padding_img = Image.new('RGB', (padding_x, padding_y), color=(255, 255, 255, 0))\n",
457
+ "\n",
458
+ " # paste the padding image onto the original image to add the padding\n",
459
+ " img.paste(padding_img, (width, height))\n",
460
+ "\n",
461
+ " # update the image dimensions to include the padding\n",
462
+ " width += padding_x\n",
463
+ " height += padding_y\n",
464
+ "\n",
465
+ " if width > 128 or height > 128:\n",
466
+ "\n",
467
+ " num_tiles_x = int(width / 128)\n",
468
+ " num_tiles_y = int(height / 128)\n",
469
+ "\n",
470
+ " upscaled_img = Image.new('RGB', (img.size[0] * 4, img.size[1] * 4))\n",
471
+ " for x in range(num_tiles_x):\n",
472
+ " for y in range(num_tiles_y):\n",
473
+ " update_state(f\"Upscaling tile {x * num_tiles_y + y + 1}/{num_tiles_x * num_tiles_y}\")\n",
474
+ " tile = img.crop((x * 128, y * 128, (x + 1) * 128, (y + 1) * 128))\n",
475
+ "\n",
476
+ " upscaled_tile = pipe_upscale(\n",
477
+ " prompt=\"\",\n",
478
+ " image=tile,\n",
479
+ " num_inference_steps=steps,\n",
480
+ " guidance_scale=guidance,\n",
481
+ " # negative_prompt = neg_prompt,\n",
482
+ " generator=generator,\n",
483
+ " ).images[0]\n",
484
+ "\n",
485
+ " upscaled_img.paste(upscaled_tile, (x * upscaled_tile.size[0], y * upscaled_tile.size[1]))\n",
486
+ "\n",
487
+ " return [upscaled_img]\n",
488
+ " else:\n",
489
+ " return pipe_upscale(\n",
490
+ " prompt=prompt,\n",
491
+ " image=img,\n",
492
+ " num_inference_steps=steps,\n",
493
+ " guidance_scale=guidance,\n",
494
+ " negative_prompt = neg_prompt,\n",
495
+ " generator=generator,\n",
496
+ " ).images\n",
497
+ "\n",
498
+ "\n",
499
+ "\n",
500
+ "def on_mode_change(mode):\n",
501
+ " return gr.update(visible = mode in (modes['img2img'], modes['inpaint'], modes['upscale4x'], modes['depth2img'])), \\\n",
502
+ " gr.update(visible = mode == modes['inpaint']), \\\n",
503
+ " gr.update(visible = mode == modes['upscale4x']), \\\n",
504
+ " gr.update(visible = mode == modes['img2img'])\n",
505
+ "\n",
506
+ "def on_steps_change(steps):\n",
507
+ " global current_steps\n",
508
+ " current_steps = steps\n",
509
+ "\n",
510
+ "css = \"\"\".main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}\n",
511
+ "\"\"\"\n",
512
+ "with gr.Blocks(css=css) as demo:\n",
513
+ " gr.HTML(\n",
514
+ " f\"\"\"\n",
515
+ " <div class=\"main-div\">\n",
516
+ " <div>\n",
517
+ " <h1>Stable Diffusion 2.1</h1>\n",
518
+ " </div><br>\n",
519
+ " <p> Model used: <a href=\"https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-ema-pruned.ckpt\" target=\"_blank\">v2-1_768-ema-pruned.ckpt</a></p>\n",
520
+ " Running on <b>{\"GPU 🔥\" if torch.cuda.is_available() else \"CPU 🥶\"}</b>\n",
521
+ " </div>\n",
522
+ " \"\"\"\n",
523
+ " )\n",
524
+ " with gr.Row():\n",
525
+ " \n",
526
+ " with gr.Column(scale=70):\n",
527
+ " with gr.Group():\n",
528
+ " with gr.Row():\n",
529
+ " prompt = gr.Textbox(label=\"Prompt\", show_label=False, max_lines=2,placeholder=f\"Enter prompt\").style(container=False)\n",
530
+ " generate = gr.Button(value=\"Generate\").style(rounded=(False, True, True, False))\n",
531
+ "\n",
532
+ " gallery = gr.Gallery(label=\"Generated images\", show_label=False).style(grid=[2], height=\"auto\")\n",
533
+ " state_info = gr.Textbox(label=\"State\", show_label=False, max_lines=2).style(container=False)\n",
534
+ " error_output = gr.Markdown(visible=False)\n",
535
+ "\n",
536
+ " with gr.Column(scale=30):\n",
537
+ " inf_mode = gr.Radio(label=\"Inference Mode\", choices=list(modes.values()), value=modes['txt2img'])\n",
538
+ " \n",
539
+ " with gr.Group(visible=False) as i2i_options:\n",
540
+ " image = gr.Image(label=\"Image\", height=128, type=\"pil\", tool='sketch')\n",
541
+ " inpaint_info = gr.Markdown(\"Inpainting resizes and pads images to 512x512\", visible=False)\n",
542
+ " upscale_info = gr.Markdown(\"\"\"Best for small images (128x128 or smaller).<br>\n",
543
+ " Bigger images will be sliced into 128x128 tiles which will be upscaled individually.<br>\n",
544
+ " This is done to avoid running out of GPU memory.\"\"\", visible=False)\n",
545
+ " strength = gr.Slider(label=\"Transformation strength\", minimum=0, maximum=1, step=0.01, value=0.5)\n",
546
+ "\n",
547
+ " with gr.Group():\n",
548
+ " neg_prompt = gr.Textbox(label=\"Negative prompt\", placeholder=\"What to exclude from the image\")\n",
549
+ "\n",
550
+ " n_images = gr.Slider(label=\"Number of images\", value=1, minimum=1, maximum=4, step=1)\n",
551
+ " with gr.Row():\n",
552
+ " guidance = gr.Slider(label=\"Guidance scale\", value=7.5, maximum=15)\n",
553
+ " steps = gr.Slider(label=\"Steps\", value=current_steps, minimum=2, maximum=100, step=1)\n",
554
+ "\n",
555
+ " with gr.Row():\n",
556
+ " width = gr.Slider(label=\"Width\", value=768, minimum=64, maximum=1024, step=8)\n",
557
+ " height = gr.Slider(label=\"Height\", value=768, minimum=64, maximum=1024, step=8)\n",
558
+ "\n",
559
+ " seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)\n",
560
+ " with gr.Accordion(\"Memory optimization\"):\n",
561
+ " attn_slicing = gr.Checkbox(label=\"Attention slicing (a bit slower, but uses less memory)\", value=attn_slicing_enabled)\n",
562
+ " # mem_eff_attn = gr.Checkbox(label=\"Memory efficient attention (xformers)\", value=mem_eff_attn_enabled)\n",
563
+ "\n",
564
+ " inf_mode.change(on_mode_change, inputs=[inf_mode], outputs=[i2i_options, inpaint_info, upscale_info, strength], queue=False)\n",
565
+ " steps.change(on_steps_change, inputs=[steps], outputs=[], queue=False)\n",
566
+ " attn_slicing.change(lambda x: switch_attention_slicing(x), inputs=[attn_slicing], queue=False)\n",
567
+ " # mem_eff_attn.change(lambda x: switch_mem_eff_attn(x), inputs=[mem_eff_attn], queue=False)\n",
568
+ "\n",
569
+ " inputs = [inf_mode, prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt]\n",
570
+ " outputs = [gallery, error_output]\n",
571
+ " prompt.submit(inference, inputs=inputs, outputs=outputs)\n",
572
+ " generate.click(inference, inputs=inputs, outputs=outputs)\n",
573
+ "\n",
574
+ " demo.load(update_state_info, inputs=state_info, outputs=state_info, every=0.5, show_progress=False)\n",
575
+ "\n",
576
+ " gr.HTML(\"\"\"\n",
577
+ " <div style=\"border-top: 1px solid #303030;\">\n",
578
+ " <br>\n",
579
+ " <p>Space by: <a href=\"https://twitter.com/hahahahohohe\"><img src=\"https://img.shields.io/twitter/follow/hahahahohohe?label=%40anzorq&style=social\" alt=\"Twitter Follow\"></a></p><br>\n",
580
+ " <p>Enjoying this app? Please consider <a href=\"https://www.buymeacoffee.com/anzorq\">supporting me</a></p>\n",
581
+ " <a href=\"https://www.buymeacoffee.com/anzorq\" target=\"_blank\"><img src=\"https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png\" alt=\"Buy Me A Coffee\" style=\"height: 45px !important;width: 162px !important;\" ></a><br><br>\n",
582
+ " <a href=\"https://github.com/qunash/stable-diffusion-2-gui\" target=\"_blank\"><img alt=\"GitHub Repo stars\" src=\"https://img.shields.io/github/stars/qunash/stable-diffusion-2-gui?style=social\"></a>\n",
583
+ " <p><img src=\"https://visitor-badge.glitch.me/badge?page_id=anzorq.sd-2-colab\" alt=\"visitors\"></p>\n",
584
+ " </div>\n",
585
+ " \"\"\")\n",
586
+ "\n",
587
+ "demo.queue()\n",
588
+ "demo.launch(debug=True, share=True, height=768)\n"
589
+ ]
590
+ }
591
+ ],
592
+ "metadata": {
593
+ "accelerator": "GPU",
594
+ "colab": {
595
+ "private_outputs": true,
596
+ "provenance": [],
597
+ "toc_visible": true,
598
+ "include_colab_link": true
599
+ },
600
+ "gpuClass": "standard",
601
+ "kernelspec": {
602
+ "display_name": "Python 3",
603
+ "name": "python3"
604
+ },
605
+ "language_info": {
606
+ "name": "python"
607
+ }
608
+ },
609
+ "nbformat": 4,
610
+ "nbformat_minor": 0
611
+ }