import gradio as gr import torch from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance from quanto import qfloat8, quantize, freeze from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from diffusers import StableDiffusionXLControlNetPipeline, UNet2DConditionModel, DiffusionPipeline, LCMScheduler from diffusers.utils import make_image_grid atkbold = ImageFont.truetype("Atkinson-Hyperlegible-Bold-102.otf",50) default_width = 1280 default_height = 720 default_timesteps = 8 def mask_image_factory(mask_text="ASK FOR\nA SNACK", width=default_width, height=default_height): img = Image.new("L", (width, height), (0,)) draw = ImageDraw.Draw(img) draw.multiline_text( xy=(0,0), text=mask_text, fill=(255,), font=atkbold, align="center", spacing=0, ) cropped = img.crop(img.getbbox()) # Calculate aspect ratios image_aspect_ratio = width / height cropped_aspect_ratio = cropped.size[0] / cropped.size[1] # Determine which dimension of cropped.size is larger if cropped_aspect_ratio > image_aspect_ratio: # Calculate new dimensions for padding new_width = int(cropped.size[1] * image_aspect_ratio) new_height = cropped.size[1] else: new_width = cropped.size[0] new_height = int(cropped.size[0] / image_aspect_ratio) # Pad the image to the desired aspect ratio padded = ImageOps.pad(cropped, (new_width, new_height)) resized = padded.resize((width, height), resample=Image.Resampling.LANCZOS) return resized preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu") # preferred_device = "cpu" preferred_dtype = torch.float32 if preferred_device == "cpu" else torch.float16 controlnet = ControlNetModel.from_pretrained( # "monster-labs/control_v1p_sd15_qrcode_monster", "monster-labs/control_v1p_sdxl_qrcode_monster", # subfolder="v2", torch_dtype=preferred_dtype, #torch_dtype=unet_preferred_dtype ).to(preferred_device) #quantize(controlnet, weights=qfloat8) #freeze(controlnet) unet = UNet2DConditionModel.from_pretrained( "latent-consistency/lcm-sdxl", torch_dtype=preferred_dtype, variant="fp16", ).to(preferred_device) ctlpipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", unet=unet, controlnet=controlnet, torch_dtype=preferred_dtype, safety_checker=None, ).to(preferred_device) ctlpipe.scheduler = LCMScheduler.from_config(ctlpipe.scheduler.config) #quantize(ctlpipe.unet, weights=qfloat8) #freeze(ctlpipe.unet) #quantize(ctlpipe.text_encoder, weights=qfloat8) #freeze(ctlpipe.text_encoder) def app(prompt, negative_prompt, mask_text, num_inference_steps, controlnet_conditioning_scale, width, height, seed, count): all_images = [ctlpipe( prompt=prompt, negative_prompt=negative_prompt, image=mask_image_factory(mask_text=mask_text, width=width, height=height), num_inference_steps=int(num_inference_steps), guidance_scale=8.0, controlnet_conditioning_scale=float(controlnet_conditioning_scale), generator=torch.manual_seed(int(seed + i)), height=height, width=width, ).images[0] for i in range(count)] if count == 1: cols = 1 rows = 1 elif count == 2: cols = 1 rows = 2 else: cols = 2 if count % 2 == 0 else 1 rows = count // cols return make_image_grid(all_images, cols=cols, rows=rows) app("corgis running in the park", "ugly, wrong", "ASK FOR\nA SNACK", 1, 1.0, default_height, default_width, 42, 1) iface = gr.Interface( app, [ gr.Textbox(label="Prompt", value="lots of puppies frolicking in a flower-filled meadow around tall trees at golden hour"), gr.Textbox(label="Negative Prompt", value="ugly, wrong"), gr.Textbox(label="Mask Text", value="ASK FOR\nA SNACK"), gr.Number(label="Number of Inference Steps", value=default_timesteps, minimum=1, maximum=50, step=1), gr.Slider(label="ControlNet Conditioning Scale", value=0.57, minimum=-1.0, maximum=2.0, step=0.01), gr.Number(label="Width", value=default_width, minimum=256, maximum=2048, precision=0), gr.Number(label="Height", value=default_height, minimum=256, maximum=2048, precision=0), gr.Number(label="Random Number Seed", value=42, minimum=0, maximum=2**32-1, precision=0), gr.Radio(label="Number of Images to Generate with Subsequent Consecutive Seeds", choices=[1, 2, 4, 6, 10], value=2), ], "image", ) iface.launch()