Spaces:

Rothfeld
/

stable-diffusion-mat-outpainting-primer

Runtime error

App Files Files Community

stable-outpainting

by random23 - opened Oct 12, 2022

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+201

-433

Files changed (17) hide show

.gitattributes +0 -1
20221109.3a1e97df21bbdb63.gif +0 -3
app.py +39 -161
bread.gif +0 -3
flagscapes.gif +0 -3
generate_image.py +162 -0
heineken.gif +0 -3
hild.gif +0 -3
models/Places_512_FullData+LAION300k+OPM1200k.pkl +0 -3
models/Places_512_FullData+LAION300k+OPM300k.pkl +0 -3
models/Places_512_FullData+LAION300k.pkl +0 -3
msoffice.gif +0 -3
op.gif +0 -3
outpainting_example1.py +0 -38
outpainting_example2.py +0 -197
process.gif +0 -3
walmart.gif +0 -3

.gitattributes CHANGED Viewed

@@ -1,5 +1,4 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
-*.gif filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text

20221109.3a1e97df21bbdb63.gif DELETED Viewed

Git LFS Details

SHA256: e35bfc3aa454944634194b8c694c25003d9521128067e17e8e522ecb02d824b5
Pointer size: 133 Bytes
Size of remote file: 22.9 MB

app.py CHANGED Viewed

@@ -201,7 +201,7 @@ def pad(img, size=(128, 128), tosize=(512, 512), border=1):
     mask.paste(white, tc)
     if 'A' in rimg.getbands():
-        mask.paste(rimg.getchannel('A'), tc)
     return new_img, mask
@@ -218,33 +218,12 @@ def img_to_b64(img):
 class Predictor:
     def __init__(self):
         """Load the model into memory to make running multiple predictions efficient"""
-        self.models = {
-            "places2": Inpainter(
-                network_pkl='models/Places_512_FullData.pkl',
-                resolution=512,
-                truncation_psi=1.,
-                noise_mode='const',
-            ),
-            "places2+laion300k": Inpainter(
-                network_pkl='models/Places_512_FullData+LAION300k.pkl',
-                resolution=512,
-                truncation_psi=1.,
-                noise_mode='const',
-            ),
-            "places2+laion300k+laion300k(opmasked)": Inpainter(
-                network_pkl='models/Places_512_FullData+LAION300k+OPM300k.pkl',
-                resolution=512,
-                truncation_psi=1.,
-                noise_mode='const',
-            ),
-            "places2+laion300k+laion1200k(opmasked)": Inpainter(
-                network_pkl='models/Places_512_FullData+LAION300k+OPM1200k.pkl',
-                resolution=512,
-                truncation_psi=1.,
-                noise_mode='const',
-            ),
-        }
     # The arguments and types the model takes as input
@@ -255,7 +234,6 @@ class Predictor:
         border=5,
         seed=42,
         size=0.5,
-        model='places2',
     ) -> Image:
         i, m = pad(
             img,
@@ -264,7 +242,7 @@ class Predictor:
             border=border
         )
         """Run a single prediction on the model"""
-        imgs = self.models[model].generate_images2(
             dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
             mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
             seed=seed,
@@ -281,148 +259,48 @@ class Predictor:
             1-(np.array(m) / 255)
         )
         minpainted = mask_to_alpha(inpainted, m)
-        return inpainted, minpainted,  ImageOps.invert(m)
-    def predict_tiled(
-        self,
-        img: Image.Image,
-        tosize=(512, 512),
-        border=5,
-        seed=42,
-        size=0.5,
-        model='places2',
-    ) -> Image:
-        i, morig = pad(
-            img,
-            size=size,  # (328, 328),
-            tosize=tosize,
-            border=border
-        )
-        i.putalpha(morig)
-        img = i
-        # img.save('0.png')
-        assert img.width == img.height
-        assert img.width > 512 and img.width <= 512*2
-        def tile_coords(image, n=2, tile_size=512):
-            assert image.width == image.height
-            offsets = np.linspace(0, image.width - tile_size, n).astype(int)
-            for i in range(n):
-                for j in range(n):
-                    left = offsets[j]
-                    upper = offsets[i]
-                    right = left + tile_size
-                    lower = upper + tile_size
-                    # tile = image.crop((left, upper, right, lower))
-                    yield [left, upper, right, lower]
-        for ix, tc in enumerate(tile_coords(img, n=2)):
-            i = img.crop(tc)
-            # i.save(f't{ix}.png')
-            m = i.getchannel('A')
-            """Run a single prediction on the model"""
-            imgs = self.models[model].generate_images2(
-                dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
-                mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
-                seed=seed,
-            )
-            img_op_raw = imgs[0].convert('RGBA')
-            # img_op_raw = img_op_raw.resize(tosize, resample=Image.Resampling.NEAREST)
-            inpainted = img_op_raw.copy()
-            # paste original image to remove inpainting/scaling artifacts
-            inpainted = blend(
-                i,
-                inpainted,
-                1-(np.array(m) / 255)
-            )
-            # inpainted.save(f't{ix}_op.png')
-            minpainted = mask_to_alpha(inpainted, m)
-            # continue with partially inpainted image
-            # since the tiles overlap, the next tile will contain (possibly inpainted) parts of the previous tile
-            img.paste(inpainted, tc)
-        # restore original alpha channel
-        img.putalpha(morig)
-        return img.convert('RGB'), img,  ImageOps.invert(img.getchannel('A'))
 predictor = Predictor()
 # %%
-def _outpaint(img, tosize, border, seed, size, model, tiled):
-    if tiled:
-        img_op = predictor.predict_tiled(
-            img,
-            border=border,
-            seed=seed,
-            tosize=(tosize, tosize),
-            size=float(size),
-            model=model,
-        )
-    else:
-        img_op = predictor.predict(
-            img,
-            border=border,
-            seed=seed,
-            tosize=(tosize, tosize),
-            size=float(size),
-            model=model,
-        )
     return img_op
 # %%
-with gr.Blocks() as demo:
-    maturl = 'https://github.com/fenglinglwb/MAT'
-    gr.Markdown(f'''
-        # MAT Primer for Stable Diffusion
-        ## based on MAT: Mask-Aware Transformer for Large Hole Image Inpainting
-        ### create a primer for use in stable diffusion outpainting
-        i have added 2 example scripts to the repo:
-        - outpainting_example1.py  using the inpainting pipeline
-        - outpainting_example2.py  using the img2img pipeline. this is basically what i used for the examples below
-        ''')
-    gr.HTML(f'''<a href="{maturl}">{maturl}</a>''')
-    with gr.Box():
-        with gr.Row():
-            gr.Markdown(f"""example with strength 0.5""")
-        with gr.Row():
-            gr.HTML("<img src='file/hild.gif'> ")
-            gr.HTML("<img src='file/process.gif'>")
-            gr.HTML("<img src='file/flagscapes.gif'>")
-    btn = gr.Button("Run", variant="primary")
-    with gr.Row():
-        with gr.Column():
-            searchimage = gc.Image(label="image", type='pil', image_mode='RGBA')
-            to_size = gc.Slider(1, 1920, 512, step=1, label='output size')
-            border = gc.Slider(1, 50, 0, step=1, label='border to crop from the image before outpainting')
-            seed = gc.Slider(1, 65536, 10, step=1, label='seed')
-            size = gc.Slider(0, 1, .5, step=0.01,label='scale of the image before outpainting')
-            tiled = gc.Checkbox(label='tiled: run the network with 4 tiles of size 512x512 . only usable if output size >512 and <=1024', value=False)
-            model = gc.Dropdown(
-                choices=['places2',
-                         'places2+laion300k',
-                         'places2+laion300k+laion300k(opmasked)',
-                         'places2+laion300k+laion1200k(opmasked)'],
-                value='places2+laion300k+laion1200k(opmasked)',
-                label='model',
-            )
-        with gr.Column():
-            outwithoutalpha = gc.Image(label="primed image without alpha channel", type='pil', image_mode='RGBA')
-            mask = gc.Image(label="outpainting mask", type='pil')
-            out = gc.Image(label="primed image with alpha channel",type='pil', image_mode='RGBA')
-    btn.click(
-        fn=_outpaint,
-        inputs=[searchimage, to_size, border, seed, size, model,tiled],
-        outputs=[outwithoutalpha, out,  mask])
-# %% launch
-demo.launch()

     mask.paste(white, tc)
     if 'A' in rimg.getbands():
+        mask.paste(img.getchannel('A'), tc)
     return new_img, mask
 class Predictor:
     def __init__(self):
         """Load the model into memory to make running multiple predictions efficient"""
+        self.model = Inpainter(
+            network_pkl='models/Places_512_FullData.pkl',
+            resolution=512,
+            truncation_psi=1.,
+            noise_mode='const',
+        )
     # The arguments and types the model takes as input
         border=5,
         seed=42,
         size=0.5,
     ) -> Image:
         i, m = pad(
             img,
             border=border
         )
         """Run a single prediction on the model"""
+        imgs = self.model.generate_images2(
             dpath=[i.resize((512, 512), resample=Image.Resampling.NEAREST)],
             mpath=[m.resize((512, 512), resample=Image.Resampling.NEAREST)],
             seed=seed,
             1-(np.array(m) / 255)
         )
         minpainted = mask_to_alpha(inpainted, m)
+        return minpainted, inpainted, ImageOps.invert(m)
 predictor = Predictor()
 # %%
+def _outpaint(img, tosize, border, seed, size):
+    img_op = predictor.predict(
+        img,
+        border=border,
+        seed=seed,
+        tosize=(tosize, tosize),
+        size=float(size)
+    )
     return img_op
 # %%
+searchimage = gc.Image(shape=(224, 224), label="image", type='pil')
+to_size = gc.Slider(1, 1920, 512, step=1, label='output size')
+border = gc.Slider(
+    1, 50, 0, step=1, label='border to crop from the image before outpainting')
+seed = gc.Slider(1, 65536, 10, step=1, label='seed')
+size = gc.Slider(0, 1, .5, step=0.01,
+                 label='scale of the image before outpainting')
+out = gc.Image(label="primed image with alpha channel", type='pil')
+outwithoutalpha = gc.Image(
+    label="primed image without alpha channel", type='pil')
+mask = gc.Image(label="outpainting mask", type='pil')
+maturl = 'https://github.com/fenglinglwb/MAT'
+gr.Interface(
+    _outpaint,
+    [searchimage, to_size, border, seed, size],
+    [out, outwithoutalpha, mask],
+    title=f"MAT Primer for Stable Diffusion\n\nbased on MAT: Mask-Aware Transformer for Large Hole Image Inpainting\n\n{maturl}",
+    description=f"create an outpainting primer for use in stable diffusion outpainting",
+    analytics_enabled=False,
+    allow_flagging='never',
+).launch()

bread.gif DELETED Viewed

Git LFS Details

SHA256: 107e8adb6adb52d59cdc9c66e8306c05d4deb17f1fc24c4bc4196d4337b18d92
Pointer size: 133 Bytes
Size of remote file: 22.8 MB

flagscapes.gif DELETED Viewed

Git LFS Details

SHA256: 51ab26dfe1543c2418254bdab15ffb5081b2fa39a80031fb2511e7bba122b055
Pointer size: 133 Bytes
Size of remote file: 24.5 MB

generate_image.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Generate images using pretrained network pickle."""
+from PIL import Image
+from cog import BasePredictor, Input, Path
+from networks.mat import Generator
+import legacy
+import torch.nn.functional as F
+import torch
+import PIL.Image
+import numpy as np
+import dnnlib
+import click
+from typing import List, Optional
+import random
+import re
+import os
+import glob
+import cv2
+pyspng = None
+def num_range(s: str) -> List[int]:
+    '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
+    range_re = re.compile(r'^(\d+)-(\d+)$')
+    m = range_re.match(s)
+    if m:
+        return list(range(int(m.group(1)), int(m.group(2))+1))
+    vals = s.split(',')
+    return [int(x) for x in vals]
+def copy_params_and_buffers(src_module, dst_module, require_all=False):
+    assert isinstance(src_module, torch.nn.Module)
+    assert isinstance(dst_module, torch.nn.Module)
+    src_tensors = {name: tensor for name,
+                   tensor in named_params_and_buffers(src_module)}
+    for name, tensor in named_params_and_buffers(dst_module):
+        assert (name in src_tensors) or (not require_all)
+        if name in src_tensors:
+            tensor.copy_(src_tensors[name].detach()).requires_grad_(
+                tensor.requires_grad)
+def params_and_buffers(module):
+    assert isinstance(module, torch.nn.Module)
+    return list(module.parameters()) + list(module.buffers())
+def named_params_and_buffers(module):
+    assert isinstance(module, torch.nn.Module)
+    return list(module.named_parameters()) + list(module.named_buffers())
+class Inpainter:
+    def __init__(self,
+                 network_pkl,
+                 resolution=512,
+                 truncation_psi=1,
+                 noise_mode='const',
+                 sdevice='cpu'
+                 ):
+        self.resolution = resolution
+        self.truncation_psi = truncation_psi
+        self.noise_mode = noise_mode
+        print(f'Loading networks from: {network_pkl}')
+        self.device = torch.device(sdevice)
+        with dnnlib.util.open_url(network_pkl) as f:
+            G_saved = legacy.load_network_pkl(f)['G_ema'].to(
+                device).eval().requires_grad_(False)  # type: ignore
+        net_res = 512 if resolution > 512 else resolution
+        self.G = Generator(
+            z_dim=512,
+            c_dim=0,
+            w_dim=512,
+            img_resolution=net_res,
+            img_channels=3
+        ).to(self.device).eval().requires_grad_(False)
+        copy_params_and_buffers(G_saved,  self.G, require_all=True)
+    def generate_images2(
+        self,
+        dpath: List[PIL.Image.Image],
+        mpath: List[Optional[PIL.Image.Image]],
+        seed: int = 42,
+    ):
+        """
+        Generate images using pretrained network pickle.
+        """
+        resolution = self.resolution
+        truncation_psi = self.truncation_psi
+        noise_mode = self.noise_mode
+        # seed = 240  # pick up a random number
+        def seed_all(seed):
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+            torch.cuda.manual_seed(seed)
+        if seed is not None:
+            seed_all(seed)
+        # no Labels.
+        label = torch.zeros([1,  self.G.c_dim], device=self.device)
+        def read_image(image):
+            image = np.array(image)
+            if image.ndim == 2:
+                image = image[:, :, np.newaxis]  # HW => HWC
+                image = np.repeat(image, 3, axis=2)
+            image = image.transpose(2, 0, 1)  # HWC => CHW
+            image = image[:3]
+            return image
+        if resolution != 512:
+            noise_mode = 'random'
+        results = []
+        with torch.no_grad():
+            for i, (ipath, m) in enumerate(zip(dpath, mpath)):
+                if seed is None:
+                    seed_all(i)
+                image = read_image(ipath)
+                image = (torch.from_numpy(image).float().to(
+                    self. device) / 127.5 - 1).unsqueeze(0)
+                if m is not None:
+                    mask = np.array(m).astype(np.float32) / 255.0
+                    mask = torch.from_numpy(mask).float().to(
+                        self. device).unsqueeze(0).unsqueeze(0)
+                else:
+                    # adjust the masking ratio by using 'hole_range'
+                    mask = RandomMask(resolution)
+                    mask = torch.from_numpy(
+                        mask).float().to(self.device).unsqueeze(0)
+                z = torch.from_numpy(np.random.randn(
+                    1,  self.G.z_dim)).to(self.device)
+                output = self.G(image, mask, z, label,
+                                truncation_psi=truncation_psi, noise_mode=noise_mode)
+                output = (output.permute(0, 2, 3, 1) * 127.5 +
+                          127.5).round().clamp(0, 255).to(torch.uint8)
+                output = output[0].cpu().numpy()
+                results.append(PIL.Image.fromarray(output, 'RGB'))
+        return results
+if __name__ == "__main__":
+    generate_images()  # pylint: disable=no-value-for-parameter
+# ----------------------------------------------------------------------------
+# simple rest api for inference

heineken.gif DELETED Viewed

Git LFS Details

SHA256: 3226faefcd59ddf0e7508ba8f00035ff2f4d581c131af17e457d2af6deced6c6
Pointer size: 132 Bytes
Size of remote file: 9.75 MB

hild.gif DELETED Viewed

Git LFS Details

SHA256: a1480f45f8f1c95c6f1922f8873fad07c32573355c8fb2e0719c2ea1cd1f0fed
Pointer size: 133 Bytes
Size of remote file: 24.1 MB

models/Places_512_FullData+LAION300k+OPM1200k.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f9ecebfd38f952abd3fde0a74caba64333627a80660f8c14699c1778232231e2
-size 661315824

models/Places_512_FullData+LAION300k+OPM300k.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d2ed6751e2ed8a2120864fd5c1f08a8e926a2f79d5aa91bb35f9cc32869e77f
-size 661315824

models/Places_512_FullData+LAION300k.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0230b8b39287e4a1ec4c53a7c724188cf0fe6dab2610bf79cdff3756b8517291
-size 661315824

msoffice.gif DELETED Viewed

Git LFS Details

SHA256: d2c4ac49c60cc2d5bb706eee00af01417503e5bc68dcf0d4dff842da3da672a0
Pointer size: 132 Bytes
Size of remote file: 6.57 MB

op.gif DELETED Viewed

Git LFS Details

SHA256: 2f046c9635d86f7856a4038925b1ecafcccd8113401da4f6883ef4d97a708430
Pointer size: 132 Bytes
Size of remote file: 6.57 MB

outpainting_example1.py DELETED Viewed

@@ -1,38 +0,0 @@
-# %%
-# an example script of how to do outpainting with the diffusers inpainting pipeline
-# this is basically just the example from
-# https://huggingface.co/runwayml/stable-diffusion-inpainting
-#%
-from diffusers import StableDiffusionInpaintPipeline
-from PIL import Image
-import numpy as np
-import torch
-from diffusers import StableDiffusionInpaintPipeline
-pipe = StableDiffusionInpaintPipeline.from_pretrained(
-    "runwayml/stable-diffusion-inpainting",
-    revision="fp16",
-    torch_dtype=torch.float16,
-)
-pipe.to("cuda")
-# load the image, extract the mask
-rgba = Image.open('primed_image_with_alpha_channel.png')
-mask_image = Image.fromarray(np.array(rgba)[:, :, 3] == 0)
-# run the pipeline
-prompt = "Face of a yellow cat, high resolution, sitting on a park bench."
-# image and mask_image should be PIL images.
-# The mask structure is white for outpainting and black for keeping as is
-image = pipe(
-    prompt=prompt,
-    image=rgba,
-    mask_image=mask_image,
-).images[0]
-image
-# %%
-# the vae does lossy encoding, we could get better quality if we pasted the original image into our result.
-# this may yield visible edges

outpainting_example2.py DELETED Viewed

@@ -1,197 +0,0 @@
-# %%
-# an example script of how to do outpainting with diffusers img2img pipeline
-# should be compatible with any stable diffusion model
-# (only tested with runwayml/stable-diffusion-v1-5)
-from typing import Callable, List, Optional, Union
-from PIL import Image
-import PIL
-import numpy as np
-import torch
-from diffusers import StableDiffusionImg2ImgPipeline
-from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-    "runwayml/stable-diffusion-v1-5",
-    revision="fp16",
-    torch_dtype=torch.float16,
-)
-pipe.set_use_memory_efficient_attention_xformers(True)
-pipe.to("cuda")
-# %%
-# load the image, extract the mask
-rgba = Image.open('primed_image_with_alpha_channel.png')
-mask_full = np.array(rgba)[:, :, 3] == 0
-rgb = rgba.convert('RGB')
-# %%
-# resize/convert the mask to the right size
-# for 512x512, the mask should be 1x4x64x64
-hw = np.array(mask_full.shape)
-h, w = (hw - hw % 32) // 8
-mask_image = Image.fromarray(mask_full).resize((w, h), Image.NEAREST)
-mask = (np.array(mask_image) == 0)[None, None]
-mask = np.concatenate([mask]*4, axis=1)
-mask = torch.from_numpy(mask).to('cuda')
-mask.shape
-# %%
-@torch.no_grad()
-def outpaint(
-    self: StableDiffusionImg2ImgPipeline,
-    prompt: Union[str, List[str]] = None,
-    image: Union[torch.FloatTensor, PIL.Image.Image] = None,
-    strength: float = 0.8,
-    num_inference_steps: Optional[int] = 50,
-    guidance_scale: Optional[float] = 7.5,
-    negative_prompt: Optional[Union[str, List[str]]] = None,
-    num_images_per_prompt: Optional[int] = 1,
-    eta: Optional[float] = 0.0,
-    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-    prompt_embeds: Optional[torch.FloatTensor] = None,
-    negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-    output_type: Optional[str] = "pil",
-    return_dict: bool = True,
-    callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-    callback_steps: Optional[int] = 1,
-    **kwargs,
-):
-    r"""
-    copy of the original img2img pipeline's __call__()
-    https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
-    Changes are marked with <EDIT> and </EDIT>
-    """
-    # message = "Please use `image` instead of `init_image`."
-    # init_image = deprecate("init_image", "0.14.0", message, take_from=kwargs)
-    # image = init_image or image
-    # 1. Check inputs. Raise error if not correct
-    self.check_inputs(prompt, strength, callback_steps,
-                      negative_prompt, prompt_embeds, negative_prompt_embeds)
-    # 2. Define call parameters
-    if prompt is not None and isinstance(prompt, str):
-        batch_size = 1
-    elif prompt is not None and isinstance(prompt, list):
-        batch_size = len(prompt)
-    else:
-        batch_size = prompt_embeds.shape[0]
-    device = self._execution_device
-    # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
-    # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
-    # corresponds to doing no classifier free guidance.
-    do_classifier_free_guidance = guidance_scale > 1.0
-    # 3. Encode input prompt
-    prompt_embeds = self._encode_prompt(
-        prompt,
-        device,
-        num_images_per_prompt,
-        do_classifier_free_guidance,
-        negative_prompt,
-        prompt_embeds=prompt_embeds,
-        negative_prompt_embeds=negative_prompt_embeds,
-    )
-    # 4. Preprocess image
-    image = preprocess(image)
-    # 5. set timesteps
-    self.scheduler.set_timesteps(num_inference_steps, device=device)
-    timesteps, num_inference_steps = self.get_timesteps(
-        num_inference_steps, strength, device)
-    latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt)
-    # 6. Prepare latent variables
-    latents = self.prepare_latents(
-        image, latent_timestep, batch_size, num_images_per_prompt, prompt_embeds.dtype, device, generator
-    )
-    # <EDIT>
-    # store the encoded version of the original image to overwrite
-    # what the UNET generates "underneath" our image on each step
-    encoded_original = (self.vae.config.scaling_factor *
-                  self.vae.encode(
-                      image.to(latents.device, latents.dtype)
-                  ).latent_dist.mean)
-    # </EDIT>
-    # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-    extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-    # 8. Denoising loop
-    num_warmup_steps = len(timesteps) - \
-        num_inference_steps * self.scheduler.order
-    with self.progress_bar(total=num_inference_steps) as progress_bar:
-        for i, t in enumerate(timesteps):
-            # expand the latents if we are doing classifier free guidance
-            latent_model_input = torch.cat(
-                [latents] * 2) if do_classifier_free_guidance else latents
-            latent_model_input = self.scheduler.scale_model_input(
-                latent_model_input, t)
-            # predict the noise residual
-            noise_pred = self.unet(latent_model_input, t,
-                                   encoder_hidden_states=prompt_embeds).sample
-            # perform guidance
-            if do_classifier_free_guidance:
-                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * \
-                    (noise_pred_text - noise_pred_uncond)
-            # compute the previous noisy sample x_t -> x_t-1
-            latents = self.scheduler.step(
-                noise_pred, t, latents, **extra_step_kwargs).prev_sample
-            # <EDIT> paste unmasked regions from the original image
-            noise = torch.randn(
-                encoded_original.shape, generator=generator, device=device)
-            noised_encoded_original = self.scheduler.add_noise(
-                encoded_original, noise, t).to(noise_pred.device, noise_pred.dtype)
-            latents[mask] = noised_encoded_original[mask]
-            # </EDIT>
-            # call the callback, if provided
-            if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                progress_bar.update()
-                if callback is not None and i % callback_steps == 0:
-                    callback(i, t, latents)
-    # 9. Post-processing
-    image = self.decode_latents(latents)
-    # 10. Run safety checker
-    image, has_nsfw_concept = self.run_safety_checker(
-        image, device, prompt_embeds.dtype)
-    # 11. Convert to PIL
-    if output_type == "pil":
-        image = self.numpy_to_pil(image)
-    if not return_dict:
-        return (image, has_nsfw_concept)
-    return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
-# %%
-image = outpaint(
-    pipe,
-    image=rgb,
-    prompt="forest in the style of Tim Hildebrandt",
-    strength=0.5,
-    num_inference_steps=50,
-    guidance_scale=7.5,
-).images[0]
-image
-# %%
-# the vae does lossy encoding, we could get better quality if we pasted the original image into our result.
-# this may yield visible edges

process.gif DELETED Viewed

Git LFS Details

SHA256: b1ba0e59fcceb1f685e357eac1de305f98a008e37887015290eea5d23d251bc9
Pointer size: 133 Bytes
Size of remote file: 10.4 MB

walmart.gif DELETED Viewed

Git LFS Details

SHA256: a151840ccd81324304e8c3a25a519b9509873310d75a23368cb2223bfd689cbb
Pointer size: 133 Bytes
Size of remote file: 22.6 MB