Spaces:
Running
Running
File size: 4,686 Bytes
6b394d7 4ba5fc1 6b394d7 dfaed80 6b394d7 68b0b4d 4ba5fc1 6b394d7 4ba5fc1 6b394d7 4ba5fc1 6b394d7 4ba5fc1 6b394d7 d6c27f8 6b394d7 dfaed80 6b394d7 dfaed80 6b394d7 d6c27f8 6b394d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
import torch
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance
from quanto import qfloat8, quantize, freeze
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import StableDiffusionXLControlNetPipeline, UNet2DConditionModel, DiffusionPipeline, LCMScheduler
from diffusers.utils import make_image_grid
atkbold = ImageFont.truetype("Atkinson-Hyperlegible-Bold-102.otf",50)
default_width = 1280
default_height = 720
default_timesteps = 8
def mask_image_factory(mask_text="ASK FOR\nA SNACK", width=default_width, height=default_height):
img = Image.new("L", (width, height), (0,))
draw = ImageDraw.Draw(img)
draw.multiline_text(
xy=(0,0),
text=mask_text,
fill=(255,),
font=atkbold,
align="center",
spacing=0,
)
cropped = img.crop(img.getbbox())
# Calculate aspect ratios
image_aspect_ratio = width / height
cropped_aspect_ratio = cropped.size[0] / cropped.size[1]
# Determine which dimension of cropped.size is larger
if cropped_aspect_ratio > image_aspect_ratio:
# Calculate new dimensions for padding
new_width = int(cropped.size[1] * image_aspect_ratio)
new_height = cropped.size[1]
else:
new_width = cropped.size[0]
new_height = int(cropped.size[0] / image_aspect_ratio)
# Pad the image to the desired aspect ratio
padded = ImageOps.pad(cropped, (new_width, new_height))
resized = padded.resize((width, height), resample=Image.Resampling.LANCZOS)
return resized
preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
# preferred_device = "cpu"
preferred_dtype = torch.float32 if preferred_device == "cpu" else torch.float16
controlnet = ControlNetModel.from_pretrained(
# "monster-labs/control_v1p_sd15_qrcode_monster",
"monster-labs/control_v1p_sdxl_qrcode_monster",
# subfolder="v2",
torch_dtype=preferred_dtype,
#torch_dtype=unet_preferred_dtype
).to(preferred_device)
#quantize(controlnet, weights=qfloat8)
#freeze(controlnet)
unet = UNet2DConditionModel.from_pretrained(
"latent-consistency/lcm-sdxl",
torch_dtype=preferred_dtype,
variant="fp16",
).to(preferred_device)
ctlpipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
unet=unet,
controlnet=controlnet,
torch_dtype=preferred_dtype,
safety_checker=None,
).to(preferred_device)
ctlpipe.scheduler = LCMScheduler.from_config(ctlpipe.scheduler.config)
#quantize(ctlpipe.unet, weights=qfloat8)
#freeze(ctlpipe.unet)
#quantize(ctlpipe.text_encoder, weights=qfloat8)
#freeze(ctlpipe.text_encoder)
def app(prompt, negative_prompt, mask_text, num_inference_steps, controlnet_conditioning_scale, width, height, seed, count):
all_images = [ctlpipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=mask_image_factory(mask_text=mask_text, width=width, height=height),
num_inference_steps=int(num_inference_steps),
guidance_scale=8.0,
controlnet_conditioning_scale=float(controlnet_conditioning_scale),
generator=torch.manual_seed(int(seed + i)),
height=height,
width=width,
).images[0] for i in range(count)]
if count == 1:
cols = 1
rows = 1
elif count == 2:
cols = 1
rows = 2
else:
cols = 2 if count % 2 == 0 else 1
rows = count // cols
return make_image_grid(all_images, cols=cols, rows=rows)
app("corgis running in the park", "ugly, wrong", "ASK FOR\nA SNACK", 1, 1.0, default_height, default_width, 42, 1)
iface = gr.Interface(
app,
[
gr.Textbox(label="Prompt", value="lots of puppies frolicking in a flower-filled meadow around tall trees at golden hour"),
gr.Textbox(label="Negative Prompt", value="ugly, wrong"),
gr.Textbox(label="Mask Text", value="ASK FOR\nA SNACK"),
gr.Number(label="Number of Inference Steps", value=default_timesteps, minimum=1, maximum=50, step=1),
gr.Slider(label="ControlNet Conditioning Scale", value=0.57, minimum=-1.0, maximum=2.0, step=0.01),
gr.Number(label="Width", value=default_width, minimum=256, maximum=2048, precision=0),
gr.Number(label="Height", value=default_height, minimum=256, maximum=2048, precision=0),
gr.Number(label="Random Number Seed", value=42, minimum=0, maximum=2**32-1, precision=0),
gr.Radio(label="Number of Images to Generate with Subsequent Consecutive Seeds", choices=[1, 2, 4, 6, 10], value=2),
],
"image",
)
iface.launch()
|