.gitattributes CHANGED
@@ -1,5 +1,4 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.gif filter=lfs diff=lfs merge=lfs -text
3
  *.arrow filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
20221109.3a1e97df21bbdb63.gif DELETED

Git LFS Details

  • SHA256: e35bfc3aa454944634194b8c694c25003d9521128067e17e8e522ecb02d824b5
  • Pointer size: 133 Bytes
  • Size of remote file: 22.9 MB
app.py CHANGED
@@ -201,7 +201,7 @@ def pad(img, size=(128, 128), tosize=(512, 512), border=1):
201
  mask.paste(white, tc)
202
 
203
  if 'A' in rimg.getbands():
204
- mask.paste(rimg.getchannel('A'), tc)
205
  return new_img, mask
206
 
207
 
@@ -218,33 +218,12 @@ def img_to_b64(img):
218
  class Predictor:
219
  def __init__(self):
220
  """Load the model into memory to make running multiple predictions efficient"""
221
- self.models = {
222
- "places2": Inpainter(
223
- network_pkl='models/Places_512_FullData.pkl',
224
- resolution=512,
225
- truncation_psi=1.,
226
- noise_mode='const',
227
- ),
228
- "places2+laion300k": Inpainter(
229
- network_pkl='models/Places_512_FullData+LAION300k.pkl',
230
- resolution=512,
231
- truncation_psi=1.,
232
- noise_mode='const',
233
- ),
234
- "places2+laion300k+laion300k(opmasked)": Inpainter(
235
- network_pkl='models/Places_512_FullData+LAION300k+OPM300k.pkl',
236
- resolution=512,
237
- truncation_psi=1.,
238
- noise_mode='const',
239
- ),
240
- "places2+laion300k+laion1200k(opmasked)": Inpainter(
241
- network_pkl='models/Places_512_FullData+LAION300k+OPM1200k.pkl',
242
- resolution=512,
243
- truncation_psi=1.,
244
- noise_mode='const',
245
- ),
246
-
247
- }
248
 
249
  # The arguments and types the model takes as input
250
 
@@ -255,7 +234,6 @@ class Predictor:
255
  border=5,
256
  seed=42,
257
  size=0.5,
258
- model='places2',
259
  ) -> Image:
260
  i, m = pad(
261
  img,
@@ -264,7 +242,7 @@ class Predictor:
264
  border=border
265
  )
266
  """Run a single prediction on the model"""
267
- imgs = self.models[model].generate_images2(
268
  dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
269
  mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
270
  seed=seed,
@@ -281,148 +259,48 @@ class Predictor:
281
  1-(np.array(m) / 255)
282
  )
283
  minpainted = mask_to_alpha(inpainted, m)
284
- return inpainted, minpainted, ImageOps.invert(m)
285
 
286
- def predict_tiled(
287
- self,
288
- img: Image.Image,
289
- tosize=(512, 512),
290
- border=5,
291
- seed=42,
292
- size=0.5,
293
- model='places2',
294
- ) -> Image:
295
 
296
- i, morig = pad(
297
- img,
298
- size=size, # (328, 328),
299
- tosize=tosize,
300
- border=border
301
- )
302
- i.putalpha(morig)
303
- img = i
304
- # img.save('0.png')
305
- assert img.width == img.height
306
- assert img.width > 512 and img.width <= 512*2
307
-
308
- def tile_coords(image, n=2, tile_size=512):
309
- assert image.width == image.height
310
- offsets = np.linspace(0, image.width - tile_size, n).astype(int)
311
- for i in range(n):
312
- for j in range(n):
313
- left = offsets[j]
314
- upper = offsets[i]
315
- right = left + tile_size
316
- lower = upper + tile_size
317
- # tile = image.crop((left, upper, right, lower))
318
- yield [left, upper, right, lower]
319
-
320
- for ix, tc in enumerate(tile_coords(img, n=2)):
321
- i = img.crop(tc)
322
- # i.save(f't{ix}.png')
323
- m = i.getchannel('A')
324
-
325
- """Run a single prediction on the model"""
326
- imgs = self.models[model].generate_images2(
327
- dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
328
- mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
329
- seed=seed,
330
- )
331
- img_op_raw = imgs[0].convert('RGBA')
332
- # img_op_raw = img_op_raw.resize(tosize, resample=Image.Resampling.NEAREST)
333
- inpainted = img_op_raw.copy()
334
-
335
- # paste original image to remove inpainting/scaling artifacts
336
- inpainted = blend(
337
- i,
338
- inpainted,
339
- 1-(np.array(m) / 255)
340
- )
341
- # inpainted.save(f't{ix}_op.png')
342
- minpainted = mask_to_alpha(inpainted, m)
343
- # continue with partially inpainted image
344
- # since the tiles overlap, the next tile will contain (possibly inpainted) parts of the previous tile
345
- img.paste(inpainted, tc)
346
-
347
- # restore original alpha channel
348
- img.putalpha(morig)
349
- return img.convert('RGB'), img, ImageOps.invert(img.getchannel('A'))
350
  predictor = Predictor()
351
 
352
  # %%
353
 
354
 
355
- def _outpaint(img, tosize, border, seed, size, model, tiled):
356
- if tiled:
357
- img_op = predictor.predict_tiled(
358
- img,
359
- border=border,
360
- seed=seed,
361
- tosize=(tosize, tosize),
362
- size=float(size),
363
- model=model,
364
- )
365
- else:
366
- img_op = predictor.predict(
367
- img,
368
- border=border,
369
- seed=seed,
370
- tosize=(tosize, tosize),
371
- size=float(size),
372
- model=model,
373
- )
374
  return img_op
375
  # %%
376
 
377
 
378
- with gr.Blocks() as demo:
379
- maturl = 'https://github.com/fenglinglwb/MAT'
380
- gr.Markdown(f'''
381
- # MAT Primer for Stable Diffusion
382
- ## based on MAT: Mask-Aware Transformer for Large Hole Image Inpainting
383
- ### create a primer for use in stable diffusion outpainting
384
-
385
- i have added 2 example scripts to the repo:
386
- - outpainting_example1.py using the inpainting pipeline
387
- - outpainting_example2.py using the img2img pipeline. this is basically what i used for the examples below
388
- ''')
389
-
390
- gr.HTML(f'''<a href="{maturl}">{maturl}</a>''')
391
- with gr.Box():
392
- with gr.Row():
393
- gr.Markdown(f"""example with strength 0.5""")
394
- with gr.Row():
395
- gr.HTML("<img src='file/hild.gif'> ")
396
- gr.HTML("<img src='file/process.gif'>")
397
- gr.HTML("<img src='file/flagscapes.gif'>")
398
- btn = gr.Button("Run", variant="primary")
399
- with gr.Row():
400
- with gr.Column():
401
- searchimage = gc.Image(label="image", type='pil', image_mode='RGBA')
402
- to_size = gc.Slider(1, 1920, 512, step=1, label='output size')
403
- border = gc.Slider(1, 50, 0, step=1, label='border to crop from the image before outpainting')
404
- seed = gc.Slider(1, 65536, 10, step=1, label='seed')
405
- size = gc.Slider(0, 1, .5, step=0.01,label='scale of the image before outpainting')
406
- tiled = gc.Checkbox(label='tiled: run the network with 4 tiles of size 512x512 . only usable if output size >512 and <=1024', value=False)
407
-
408
- model = gc.Dropdown(
409
- choices=['places2',
410
- 'places2+laion300k',
411
- 'places2+laion300k+laion300k(opmasked)',
412
- 'places2+laion300k+laion1200k(opmasked)'],
413
- value='places2+laion300k+laion1200k(opmasked)',
414
- label='model',
415
- )
416
- with gr.Column():
417
- outwithoutalpha = gc.Image(label="primed image without alpha channel", type='pil', image_mode='RGBA')
418
- mask = gc.Image(label="outpainting mask", type='pil')
419
- out = gc.Image(label="primed image with alpha channel",type='pil', image_mode='RGBA')
420
 
421
- btn.click(
422
- fn=_outpaint,
423
- inputs=[searchimage, to_size, border, seed, size, model,tiled],
424
- outputs=[outwithoutalpha, out, mask])
 
 
 
 
 
425
 
426
 
427
- # %% launch
428
- demo.launch()
 
201
  mask.paste(white, tc)
202
 
203
  if 'A' in rimg.getbands():
204
+ mask.paste(img.getchannel('A'), tc)
205
  return new_img, mask
206
 
207
 
 
218
  class Predictor:
219
  def __init__(self):
220
  """Load the model into memory to make running multiple predictions efficient"""
221
+ self.model = Inpainter(
222
+ network_pkl='models/Places_512_FullData.pkl',
223
+ resolution=512,
224
+ truncation_psi=1.,
225
+ noise_mode='const',
226
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  # The arguments and types the model takes as input
229
 
 
234
  border=5,
235
  seed=42,
236
  size=0.5,
 
237
  ) -> Image:
238
  i, m = pad(
239
  img,
 
242
  border=border
243
  )
244
  """Run a single prediction on the model"""
245
+ imgs = self.model.generate_images2(
246
  dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
247
  mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
248
  seed=seed,
 
259
  1-(np.array(m) / 255)
260
  )
261
  minpainted = mask_to_alpha(inpainted, m)
262
+ return minpainted, inpainted, ImageOps.invert(m)
263
 
 
 
 
 
 
 
 
 
 
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  predictor = Predictor()
266
 
267
  # %%
268
 
269
 
270
+ def _outpaint(img, tosize, border, seed, size):
271
+ img_op = predictor.predict(
272
+ img,
273
+ border=border,
274
+ seed=seed,
275
+ tosize=(tosize, tosize),
276
+ size=float(size)
277
+ )
 
 
 
 
 
 
 
 
 
 
 
278
  return img_op
279
  # %%
280
 
281
 
282
+ searchimage = gc.Image(shape=(224, 224), label="image", type='pil')
283
+ to_size = gc.Slider(1, 1920, 512, step=1, label='output size')
284
+ border = gc.Slider(
285
+ 1, 50, 0, step=1, label='border to crop from the image before outpainting')
286
+ seed = gc.Slider(1, 65536, 10, step=1, label='seed')
287
+ size = gc.Slider(0, 1, .5, step=0.01,
288
+ label='scale of the image before outpainting')
289
+
290
+ out = gc.Image(label="primed image with alpha channel", type='pil')
291
+ outwithoutalpha = gc.Image(
292
+ label="primed image without alpha channel", type='pil')
293
+ mask = gc.Image(label="outpainting mask", type='pil')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ maturl = 'https://github.com/fenglinglwb/MAT'
296
+ gr.Interface(
297
+ _outpaint,
298
+ [searchimage, to_size, border, seed, size],
299
+ [out, outwithoutalpha, mask],
300
+ title=f"MAT Primer for Stable Diffusion\n\nbased on MAT: Mask-Aware Transformer for Large Hole Image Inpainting\n\n{maturl}",
301
+ description=f"create an outpainting primer for use in stable diffusion outpainting",
302
+ analytics_enabled=False,
303
+ allow_flagging='never',
304
 
305
 
306
+ ).launch()
 
bread.gif DELETED

Git LFS Details

  • SHA256: 107e8adb6adb52d59cdc9c66e8306c05d4deb17f1fc24c4bc4196d4337b18d92
  • Pointer size: 133 Bytes
  • Size of remote file: 22.8 MB
flagscapes.gif DELETED

Git LFS Details

  • SHA256: 51ab26dfe1543c2418254bdab15ffb5081b2fa39a80031fb2511e7bba122b055
  • Pointer size: 133 Bytes
  • Size of remote file: 24.5 MB
generate_image.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ """Generate images using pretrained network pickle."""
10
+ from PIL import Image
11
+ from cog import BasePredictor, Input, Path
12
+ from networks.mat import Generator
13
+ import legacy
14
+ import torch.nn.functional as F
15
+ import torch
16
+ import PIL.Image
17
+ import numpy as np
18
+ import dnnlib
19
+ import click
20
+ from typing import List, Optional
21
+ import random
22
+ import re
23
+ import os
24
+ import glob
25
+ import cv2
26
+ pyspng = None
27
+
28
+
29
+ def num_range(s: str) -> List[int]:
30
+ '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
31
+
32
+ range_re = re.compile(r'^(\d+)-(\d+)$')
33
+ m = range_re.match(s)
34
+ if m:
35
+ return list(range(int(m.group(1)), int(m.group(2))+1))
36
+ vals = s.split(',')
37
+ return [int(x) for x in vals]
38
+
39
+
40
+ def copy_params_and_buffers(src_module, dst_module, require_all=False):
41
+ assert isinstance(src_module, torch.nn.Module)
42
+ assert isinstance(dst_module, torch.nn.Module)
43
+ src_tensors = {name: tensor for name,
44
+ tensor in named_params_and_buffers(src_module)}
45
+ for name, tensor in named_params_and_buffers(dst_module):
46
+ assert (name in src_tensors) or (not require_all)
47
+ if name in src_tensors:
48
+ tensor.copy_(src_tensors[name].detach()).requires_grad_(
49
+ tensor.requires_grad)
50
+
51
+
52
+ def params_and_buffers(module):
53
+ assert isinstance(module, torch.nn.Module)
54
+ return list(module.parameters()) + list(module.buffers())
55
+
56
+
57
+ def named_params_and_buffers(module):
58
+ assert isinstance(module, torch.nn.Module)
59
+ return list(module.named_parameters()) + list(module.named_buffers())
60
+
61
+
62
+
63
+ class Inpainter:
64
+ def __init__(self,
65
+ network_pkl,
66
+ resolution=512,
67
+ truncation_psi=1,
68
+ noise_mode='const',
69
+ sdevice='cpu'
70
+ ):
71
+ self.resolution = resolution
72
+ self.truncation_psi = truncation_psi
73
+ self.noise_mode = noise_mode
74
+ print(f'Loading networks from: {network_pkl}')
75
+ self.device = torch.device(sdevice)
76
+ with dnnlib.util.open_url(network_pkl) as f:
77
+ G_saved = legacy.load_network_pkl(f)['G_ema'].to(
78
+ device).eval().requires_grad_(False) # type: ignore
79
+ net_res = 512 if resolution > 512 else resolution
80
+ self.G = Generator(
81
+ z_dim=512,
82
+ c_dim=0,
83
+ w_dim=512,
84
+ img_resolution=net_res,
85
+ img_channels=3
86
+ ).to(self.device).eval().requires_grad_(False)
87
+ copy_params_and_buffers(G_saved, self.G, require_all=True)
88
+
89
+ def generate_images2(
90
+ self,
91
+ dpath: List[PIL.Image.Image],
92
+ mpath: List[Optional[PIL.Image.Image]],
93
+ seed: int = 42,
94
+ ):
95
+ """
96
+ Generate images using pretrained network pickle.
97
+ """
98
+ resolution = self.resolution
99
+ truncation_psi = self.truncation_psi
100
+ noise_mode = self.noise_mode
101
+ # seed = 240 # pick up a random number
102
+ def seed_all(seed):
103
+ random.seed(seed)
104
+ np.random.seed(seed)
105
+ torch.manual_seed(seed)
106
+ torch.cuda.manual_seed(seed)
107
+ if seed is not None:
108
+ seed_all(seed)
109
+
110
+ # no Labels.
111
+ label = torch.zeros([1, self.G.c_dim], device=self.device)
112
+
113
+ def read_image(image):
114
+ image = np.array(image)
115
+ if image.ndim == 2:
116
+ image = image[:, :, np.newaxis] # HW => HWC
117
+ image = np.repeat(image, 3, axis=2)
118
+ image = image.transpose(2, 0, 1) # HWC => CHW
119
+ image = image[:3]
120
+ return image
121
+ if resolution != 512:
122
+ noise_mode = 'random'
123
+ results = []
124
+ with torch.no_grad():
125
+ for i, (ipath, m) in enumerate(zip(dpath, mpath)):
126
+ if seed is None:
127
+ seed_all(i)
128
+
129
+ image = read_image(ipath)
130
+ image = (torch.from_numpy(image).float().to(
131
+ self. device) / 127.5 - 1).unsqueeze(0)
132
+
133
+ if m is not None:
134
+ mask = np.array(m).astype(np.float32) / 255.0
135
+ mask = torch.from_numpy(mask).float().to(
136
+ self. device).unsqueeze(0).unsqueeze(0)
137
+ else:
138
+ # adjust the masking ratio by using 'hole_range'
139
+ mask = RandomMask(resolution)
140
+ mask = torch.from_numpy(
141
+ mask).float().to(self.device).unsqueeze(0)
142
+
143
+ z = torch.from_numpy(np.random.randn(
144
+ 1, self.G.z_dim)).to(self.device)
145
+ output = self.G(image, mask, z, label,
146
+ truncation_psi=truncation_psi, noise_mode=noise_mode)
147
+ output = (output.permute(0, 2, 3, 1) * 127.5 +
148
+ 127.5).round().clamp(0, 255).to(torch.uint8)
149
+ output = output[0].cpu().numpy()
150
+ results.append(PIL.Image.fromarray(output, 'RGB'))
151
+
152
+ return results
153
+
154
+
155
+ if __name__ == "__main__":
156
+ generate_images() # pylint: disable=no-value-for-parameter
157
+
158
+ # ----------------------------------------------------------------------------
159
+
160
+ # simple rest api for inference
161
+
162
+
heineken.gif DELETED

Git LFS Details

  • SHA256: 3226faefcd59ddf0e7508ba8f00035ff2f4d581c131af17e457d2af6deced6c6
  • Pointer size: 132 Bytes
  • Size of remote file: 9.75 MB
hild.gif DELETED

Git LFS Details

  • SHA256: a1480f45f8f1c95c6f1922f8873fad07c32573355c8fb2e0719c2ea1cd1f0fed
  • Pointer size: 133 Bytes
  • Size of remote file: 24.1 MB
models/Places_512_FullData+LAION300k+OPM1200k.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9ecebfd38f952abd3fde0a74caba64333627a80660f8c14699c1778232231e2
3
- size 661315824
 
 
 
 
models/Places_512_FullData+LAION300k+OPM300k.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d2ed6751e2ed8a2120864fd5c1f08a8e926a2f79d5aa91bb35f9cc32869e77f
3
- size 661315824
 
 
 
 
models/Places_512_FullData+LAION300k.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0230b8b39287e4a1ec4c53a7c724188cf0fe6dab2610bf79cdff3756b8517291
3
- size 661315824
 
 
 
 
msoffice.gif DELETED

Git LFS Details

  • SHA256: d2c4ac49c60cc2d5bb706eee00af01417503e5bc68dcf0d4dff842da3da672a0
  • Pointer size: 132 Bytes
  • Size of remote file: 6.57 MB
op.gif DELETED

Git LFS Details

  • SHA256: 2f046c9635d86f7856a4038925b1ecafcccd8113401da4f6883ef4d97a708430
  • Pointer size: 132 Bytes
  • Size of remote file: 6.57 MB
outpainting_example1.py DELETED
@@ -1,38 +0,0 @@
1
- # %%
2
- # an example script of how to do outpainting with the diffusers inpainting pipeline
3
- # this is basically just the example from
4
- # https://huggingface.co/runwayml/stable-diffusion-inpainting
5
- #%
6
- from diffusers import StableDiffusionInpaintPipeline
7
-
8
- from PIL import Image
9
- import numpy as np
10
- import torch
11
-
12
- from diffusers import StableDiffusionInpaintPipeline
13
-
14
- pipe = StableDiffusionInpaintPipeline.from_pretrained(
15
- "runwayml/stable-diffusion-inpainting",
16
- revision="fp16",
17
- torch_dtype=torch.float16,
18
- )
19
- pipe.to("cuda")
20
-
21
- # load the image, extract the mask
22
- rgba = Image.open('primed_image_with_alpha_channel.png')
23
- mask_image = Image.fromarray(np.array(rgba)[:, :, 3] == 0)
24
-
25
- # run the pipeline
26
- prompt = "Face of a yellow cat, high resolution, sitting on a park bench."
27
- # image and mask_image should be PIL images.
28
- # The mask structure is white for outpainting and black for keeping as is
29
- image = pipe(
30
- prompt=prompt,
31
- image=rgba,
32
- mask_image=mask_image,
33
- ).images[0]
34
- image
35
-
36
- # %%
37
- # the vae does lossy encoding, we could get better quality if we pasted the original image into our result.
38
- # this may yield visible edges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outpainting_example2.py DELETED
@@ -1,197 +0,0 @@
1
- # %%
2
- # an example script of how to do outpainting with diffusers img2img pipeline
3
- # should be compatible with any stable diffusion model
4
- # (only tested with runwayml/stable-diffusion-v1-5)
5
-
6
- from typing import Callable, List, Optional, Union
7
- from PIL import Image
8
- import PIL
9
- import numpy as np
10
- import torch
11
-
12
- from diffusers import StableDiffusionImg2ImgPipeline
13
- from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
14
- from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess
15
-
16
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
17
- "runwayml/stable-diffusion-v1-5",
18
- revision="fp16",
19
- torch_dtype=torch.float16,
20
- )
21
-
22
- pipe.set_use_memory_efficient_attention_xformers(True)
23
- pipe.to("cuda")
24
- # %%
25
- # load the image, extract the mask
26
- rgba = Image.open('primed_image_with_alpha_channel.png')
27
- mask_full = np.array(rgba)[:, :, 3] == 0
28
- rgb = rgba.convert('RGB')
29
- # %%
30
-
31
- # resize/convert the mask to the right size
32
- # for 512x512, the mask should be 1x4x64x64
33
- hw = np.array(mask_full.shape)
34
- h, w = (hw - hw % 32) // 8
35
- mask_image = Image.fromarray(mask_full).resize((w, h), Image.NEAREST)
36
- mask = (np.array(mask_image) == 0)[None, None]
37
- mask = np.concatenate([mask]*4, axis=1)
38
- mask = torch.from_numpy(mask).to('cuda')
39
- mask.shape
40
-
41
- # %%
42
-
43
-
44
- @torch.no_grad()
45
- def outpaint(
46
- self: StableDiffusionImg2ImgPipeline,
47
- prompt: Union[str, List[str]] = None,
48
- image: Union[torch.FloatTensor, PIL.Image.Image] = None,
49
- strength: float = 0.8,
50
- num_inference_steps: Optional[int] = 50,
51
- guidance_scale: Optional[float] = 7.5,
52
- negative_prompt: Optional[Union[str, List[str]]] = None,
53
- num_images_per_prompt: Optional[int] = 1,
54
- eta: Optional[float] = 0.0,
55
- generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
56
- prompt_embeds: Optional[torch.FloatTensor] = None,
57
- negative_prompt_embeds: Optional[torch.FloatTensor] = None,
58
- output_type: Optional[str] = "pil",
59
- return_dict: bool = True,
60
- callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
61
- callback_steps: Optional[int] = 1,
62
- **kwargs,
63
- ):
64
- r"""
65
- copy of the original img2img pipeline's __call__()
66
- https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
67
-
68
- Changes are marked with <EDIT> and </EDIT>
69
- """
70
- # message = "Please use `image` instead of `init_image`."
71
- # init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
72
- # image = init_image or image
73
-
74
- # 1. Check inputs. Raise error if not correct
75
- self.check_inputs(prompt, strength, callback_steps,
76
- negative_prompt, prompt_embeds, negative_prompt_embeds)
77
-
78
- # 2. Define call parameters
79
- if prompt is not None and isinstance(prompt, str):
80
- batch_size = 1
81
- elif prompt is not None and isinstance(prompt, list):
82
- batch_size = len(prompt)
83
- else:
84
- batch_size = prompt_embeds.shape[0]
85
- device = self._execution_device
86
- # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
87
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
88
- # corresponds to doing no classifier free guidance.
89
- do_classifier_free_guidance = guidance_scale > 1.0
90
-
91
- # 3. Encode input prompt
92
- prompt_embeds = self._encode_prompt(
93
- prompt,
94
- device,
95
- num_images_per_prompt,
96
- do_classifier_free_guidance,
97
- negative_prompt,
98
- prompt_embeds=prompt_embeds,
99
- negative_prompt_embeds=negative_prompt_embeds,
100
- )
101
-
102
- # 4. Preprocess image
103
- image = preprocess(image)
104
-
105
- # 5. set timesteps
106
- self.scheduler.set_timesteps(num_inference_steps, device=device)
107
- timesteps, num_inference_steps = self.get_timesteps(
108
- num_inference_steps, strength, device)
109
- latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
110
-
111
- # 6. Prepare latent variables
112
- latents = self.prepare_latents(
113
- image, latent_timestep, batch_size, num_images_per_prompt, prompt_embeds.dtype, device, generator
114
- )
115
-
116
- # <EDIT>
117
- # store the encoded version of the original image to overwrite
118
- # what the UNET generates "underneath" our image on each step
119
- encoded_original = (self.vae.config.scaling_factor *
120
- self.vae.encode(
121
- image.to(latents.device, latents.dtype)
122
- ).latent_dist.mean)
123
- # </EDIT>
124
-
125
- # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
126
- extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
127
-
128
- # 8. Denoising loop
129
- num_warmup_steps = len(timesteps) - \
130
- num_inference_steps * self.scheduler.order
131
- with self.progress_bar(total=num_inference_steps) as progress_bar:
132
- for i, t in enumerate(timesteps):
133
- # expand the latents if we are doing classifier free guidance
134
- latent_model_input = torch.cat(
135
- [latents] * 2) if do_classifier_free_guidance else latents
136
- latent_model_input = self.scheduler.scale_model_input(
137
- latent_model_input, t)
138
-
139
- # predict the noise residual
140
- noise_pred = self.unet(latent_model_input, t,
141
- encoder_hidden_states=prompt_embeds).sample
142
-
143
- # perform guidance
144
- if do_classifier_free_guidance:
145
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
146
- noise_pred = noise_pred_uncond + guidance_scale * \
147
- (noise_pred_text - noise_pred_uncond)
148
-
149
- # compute the previous noisy sample x_t -> x_t-1
150
- latents = self.scheduler.step(
151
- noise_pred, t, latents, **extra_step_kwargs).prev_sample
152
-
153
- # <EDIT> paste unmasked regions from the original image
154
- noise = torch.randn(
155
- encoded_original.shape, generator=generator, device=device)
156
- noised_encoded_original = self.scheduler.add_noise(
157
- encoded_original, noise, t).to(noise_pred.device, noise_pred.dtype)
158
- latents[mask] = noised_encoded_original[mask]
159
- # </EDIT>
160
-
161
- # call the callback, if provided
162
- if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
163
- progress_bar.update()
164
- if callback is not None and i % callback_steps == 0:
165
- callback(i, t, latents)
166
-
167
- # 9. Post-processing
168
- image = self.decode_latents(latents)
169
-
170
- # 10. Run safety checker
171
- image, has_nsfw_concept = self.run_safety_checker(
172
- image, device, prompt_embeds.dtype)
173
-
174
- # 11. Convert to PIL
175
- if output_type == "pil":
176
- image = self.numpy_to_pil(image)
177
-
178
- if not return_dict:
179
- return (image, has_nsfw_concept)
180
-
181
- return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
182
-
183
-
184
- # %%
185
- image = outpaint(
186
- pipe,
187
- image=rgb,
188
- prompt="forest in the style of Tim Hildebrandt",
189
- strength=0.5,
190
- num_inference_steps=50,
191
- guidance_scale=7.5,
192
- ).images[0]
193
- image
194
-
195
- # %%
196
- # the vae does lossy encoding, we could get better quality if we pasted the original image into our result.
197
- # this may yield visible edges
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
process.gif DELETED

Git LFS Details

  • SHA256: b1ba0e59fcceb1f685e357eac1de305f98a008e37887015290eea5d23d251bc9
  • Pointer size: 133 Bytes
  • Size of remote file: 10.4 MB
walmart.gif DELETED

Git LFS Details

  • SHA256: a151840ccd81324304e8c3a25a519b9509873310d75a23368cb2223bfd689cbb
  • Pointer size: 133 Bytes
  • Size of remote file: 22.6 MB