import gradio as gr import jax import numpy as np import jax.numpy as jnp from flax.jax_utils import replicate from flax.training.common_utils import shard from PIL import Image from diffusers import FlaxStableDiffusionControlNetPipeline, FlaxControlNetModel import cv2 def create_key(seed=0): return jax.random.PRNGKey(seed) def crop_and_resize(pilimg, size=512): """ Will downsample or upsample as necessary. """ width, height = pilimg.size minsize = min(width, height) x0 = (width - height) // 2 if width > height else 0 y0 = (height - width) // 2 if height > width else 0 pilimg = pilimg.crop((x0, y0, x0 + minsize, y0 + minsize)) pilimg = pilimg.resize((size, size), resample=Image.LANCZOS) return pilimg def canny_filter(image): gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0) edges_image = cv2.Canny(blurred_image, 50, 150) return edges_image # load control net and stable diffusion v1-5 controlnet, controlnet_params = FlaxControlNetModel.from_pretrained( "jax-diffusers-event/canny-coyo1m", dtype=jnp.bfloat16 ) pipe, params = FlaxStableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, revision="flax", dtype=jnp.bfloat16 ) def infer(prompts, negative_prompts, image): params["controlnet"] = controlnet_params # image is a numpy array, we'll convert to PIL to resize and back to numpy image = Image.fromarray(image) image = crop_and_resize(image) image = np.array(image) num_samples = 1 #jax.device_count() rng = create_key(0) rng = jax.random.split(rng, jax.device_count()) im = canny_filter(image) canny_image = Image.fromarray(im) prompt_ids = pipe.prepare_text_inputs([prompts] * num_samples) negative_prompt_ids = pipe.prepare_text_inputs([negative_prompts] * num_samples) processed_image = pipe.prepare_image_inputs([canny_image] * num_samples) p_params = replicate(params) prompt_ids = shard(prompt_ids) negative_prompt_ids = shard(negative_prompt_ids) processed_image = shard(processed_image) output = pipe( prompt_ids=prompt_ids, image=processed_image, params=p_params, prng_seed=rng, num_inference_steps=50, neg_prompt_ids=negative_prompt_ids, jit=True, ).images output_images = pipe.numpy_to_pil(np.asarray(output.reshape((num_samples,) + output.shape[-3:]))) return output_images gr.Interface(infer, inputs=["text", "text", "image"], outputs="gallery").launch()