Spaces:

clinteroni
/

outpainting-with-differential-diffusion-demo

Running on Zero

App Files Files Community

outpainting-with-differential-diffusion-demo / app.py

clinteroni

Add an example

98e453d 8 months ago

raw

history blame

10.2 kB

	import cv2
	import numpy as np
	import torch
	import gradio as gr
	import random
	import spaces

	from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
	from diffusers.utils import load_image

	DESCRIPTION='''
	This uses code lifted almost verbatim from
	[Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion). This only works well on blurry edges.
	'''

	ARTICLE='''
	The [example image](https://commons.wikimedia.org/wiki/File:Coucang.jpg) is by Aprisonsan
	and licensed under CC-BY-SA 4.0 International.
	'''

	xlp_kwargs = {
	'custom_pipeline': 'pipeline_stable_diffusion_xl_differential_img2img'
	}

	if torch.cuda.is_available():
	device = 'cuda'
	device_dtype = torch.float16
	xlp_kwargs['variant'] = 'fp16'
	else:
	device = 'cpu'
	device_dtype = torch.float32
	DESCRIPTION+='''

	This Space appears to be running on a CPU; it will take hours to get results. You may [duplicate this space](https://huggingface.co/spaces/clinteroni/outpainting-demo?duplicate=true) and pay for an upgraded runtime instead.
	'''

	xlp_kwargs['torch_dtype'] = device_dtype


	def merge_images(original, new_image, offset, direction):
	if direction in ["left", "right"]:
	merged_image = np.zeros(
	(original.shape[0], original.shape[1] + offset, 3), dtype=np.uint8)
	elif direction in ["top", "bottom"]:
	merged_image = np.zeros(
	(original.shape[0] + offset, original.shape[1], 3), dtype=np.uint8)

	if direction == "left":
	merged_image[:, offset:] = original
	merged_image[:, : new_image.shape[1]] = new_image
	elif direction == "right":
	merged_image[:, : original.shape[1]] = original
	merged_image[:, original.shape[1] + offset -
	new_image.shape[1]: original.shape[1] + offset] = new_image
	elif direction == "top":
	merged_image[offset:, :] = original
	merged_image[: new_image.shape[0], :] = new_image
	elif direction == "bottom":
	merged_image[: original.shape[0], :] = original
	merged_image[original.shape[0] + offset - new_image.shape[0]:original.shape[0] + offset, :] = new_image

	return merged_image


	def slice_image(image):
	height, width, _ = image.shape
	slice_size = min(width // 2, height // 3)

	slices = []

	for h in range(3):
	for w in range(2):
	left = w * slice_size
	upper = h * slice_size
	right = left + slice_size
	lower = upper + slice_size

	if w == 1 and right > width:
	left -= right - width
	right = width
	if h == 2 and lower > height:
	upper -= lower - height
	lower = height

	slice = image[upper:lower, left:right]
	slices.append(slice)

	return slices


	def process_image(
	image,
	fill_color=(0, 0, 0),
	mask_offset=50,
	blur_radius=500,
	expand_pixels=256,
	direction="left",
	inpaint_mask_color=50,
	max_size=1024,
	):
	height, width = image.shape[:2]

	new_height = height + \
	(expand_pixels if direction in ["top", "bottom"] else 0)
	new_width = width + \
	(expand_pixels if direction in ["left", "right"] else 0)

	if new_height > max_size:
	# If so, crop the image from the opposite side
	if direction == "top":
	image = image[:max_size, :]
	elif direction == "bottom":
	image = image[new_height - max_size:, :]
	new_height = max_size

	if new_width > max_size:
	# If so, crop the image from the opposite side
	if direction == "left":
	image = image[:, :max_size]
	elif direction == "right":
	image = image[:, new_width - max_size:]
	new_width = max_size

	height, width = image.shape[:2]

	new_image = np.full((new_height, new_width, 3), fill_color, dtype=np.uint8)
	mask = np.full_like(new_image, 255, dtype=np.uint8)
	inpaint_mask = np.full_like(new_image, 0, dtype=np.uint8)

	mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
	inpaint_mask = cv2.cvtColor(inpaint_mask, cv2.COLOR_BGR2GRAY)

	if direction == "left":
	new_image[:, expand_pixels:] = image[:, : max_size - expand_pixels]
	mask[:, : expand_pixels + mask_offset] = inpaint_mask_color
	inpaint_mask[:, :expand_pixels] = 255
	elif direction == "right":
	new_image[:, :width] = image
	mask[:, width - mask_offset:] = inpaint_mask_color
	inpaint_mask[:, width:] = 255
	elif direction == "top":
	new_image[expand_pixels:, :] = image[: max_size - expand_pixels, :]
	mask[: expand_pixels + mask_offset, :] = inpaint_mask_color
	inpaint_mask[:expand_pixels, :] = 255
	elif direction == "bottom":
	new_image[:height, :] = image
	mask[height - mask_offset:, :] = inpaint_mask_color
	inpaint_mask[height:, :] = 255

	# mask blur
	if blur_radius % 2 == 0:
	blur_radius += 1
	mask = cv2.GaussianBlur(mask, (blur_radius, blur_radius), 0)

	# telea inpaint
	_, mask_np = cv2.threshold(
	inpaint_mask, 128, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)
	inpaint = cv2.inpaint(new_image, mask_np, 3, cv2.INPAINT_TELEA)

	# convert image to tensor
	inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)
	inpaint = torch.from_numpy(inpaint).permute(2, 0, 1).float()
	inpaint = inpaint / 127.5 - 1
	inpaint = inpaint.unsqueeze(0).to(device)

	# convert mask to tensor
	mask = torch.from_numpy(mask)
	mask = mask.unsqueeze(0).float() / 255.0
	mask = mask.to(device)

	return inpaint, mask


	def image_resize(image, new_size=1024):
	height, width = image.shape[:2]

	aspect_ratio = width / height
	new_width = new_size
	new_height = new_size

	if aspect_ratio != 1:
	if width > height:
	new_height = int(new_size / aspect_ratio)
	else:
	new_width = int(new_size * aspect_ratio)

	image = cv2.resize(image, (new_width, new_height),
	interpolation=cv2.INTER_LANCZOS4)

	return image


	@spaces.GPU
	def outpaint(pil_image, direction='right', times_to_expand=4, guidance_scale=4.0, blur_radius=500):
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	pipeline = StableDiffusionXLPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	**xlp_kwargs
	).to(device)
	pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
	pipeline.scheduler.config, use_karras_sigmas=True)

	pipeline.load_ip_adapter(
	"h94/IP-Adapter",
	subfolder="sdxl_models",
	weight_name=[
	"ip-adapter-plus_sdxl_vit-h.safetensors",
	],
	image_encoder_folder="models/image_encoder",
	)
	pipeline.set_ip_adapter_scale(0.1)

	def generate_image(prompt, negative_prompt, image, mask, ip_adapter_image, seed: int = None):
	if seed is None:
	seed = random.randint(0, 2**32 - 1)

	generator = torch.Generator(device="cpu").manual_seed(seed)

	image = pipeline(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=1024,
	height=1024,
	guidance_scale=guidance_scale,
	num_inference_steps=25,
	original_image=image,
	image=image,
	strength=1.0,
	map=mask,
	generator=generator,
	ip_adapter_image=[ip_adapter_image],
	output_type="np",
	).images[0]

	image = (image * 255).astype(np.uint8)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	return image

	prompt = ""
	negative_prompt = ""
	inpaint_mask_color = 50 # lighter use more of the Telea inpainting
	# I recommend to don't go more than half of the picture so it has context
	expand_pixels = 256

	original = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
	image = image_resize(original)
	# image.shape[1] for horizontal, image.shape[0] for vertical
	expand_pixels_to_square = 1024 - image.shape[1]
	image, mask = process_image(
	image, expand_pixels=expand_pixels_to_square, direction=direction, inpaint_mask_color=inpaint_mask_color, blur_radius=blur_radius
	)

	ip_adapter_image = []
	for index, part in enumerate(slice_image(original)):
	ip_adapter_image.append(part)

	generated = generate_image(
	prompt, negative_prompt, image, mask, ip_adapter_image)
	final_image = generated

	for i in range(times_to_expand):
	image, mask = process_image(
	final_image, direction=direction, expand_pixels=expand_pixels, inpaint_mask_color=inpaint_mask_color, blur_radius=blur_radius
	)

	ip_adapter_image = []
	for index, part in enumerate(slice_image(generated)):
	ip_adapter_image.append(part)

	generated = generate_image(
	prompt, negative_prompt, image, mask, ip_adapter_image)
	final_image = merge_images(final_image, generated, 256, direction)

	color_converted = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
	return color_converted

	gradio_app = gr.Interface(
	outpaint,
	inputs=[
	gr.Image(label="Select start image", sources=[
	'upload', 'clipboard'], type='pil'),
	gr.Radio(["left", "right", "top", 'bottom'], label="Direction",
	info="Outward from which edge to paint?", value='right'),
	gr.Slider(2, 4, step=1, value=4, label="Times to expand",
	info="Choose between 2 and 4"),
	gr.Slider(1, 12, step=0.1, value=4, label="Guidance scale",
	info="Choose between 1 and 12"),
	gr.Slider(250, 500, step=1, value=500, label="Mask blur radius",
	info="Choose between 250 and 500"),
	],
	outputs=[gr.Image(label="Processed Image")],
	examples=[
	[load_image('examples/Coucang.jpg'), 'right', 4, 5, 500]
	],
	title="Outpainting with differential diffusion demo",
	description=DESCRIPTION,
	article=ARTICLE
	)

	if __name__ == "__main__":
	gradio_app.queue(max_size=20).launch()