File size: 4,686 Bytes
6b394d7
 
 
 
 
 
4ba5fc1
6b394d7
 
 
 
dfaed80
 
6b394d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68b0b4d
4ba5fc1
6b394d7
 
4ba5fc1
 
 
6b394d7
 
 
 
 
 
 
4ba5fc1
 
 
 
 
 
 
 
 
6b394d7
 
 
 
 
4ba5fc1
 
6b394d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6c27f8
6b394d7
 
 
 
 
 
 
 
 
dfaed80
6b394d7
 
 
dfaed80
6b394d7
 
 
d6c27f8
6b394d7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr

import torch
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance
from quanto import qfloat8, quantize, freeze
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import StableDiffusionXLControlNetPipeline, UNet2DConditionModel, DiffusionPipeline, LCMScheduler
from diffusers.utils import make_image_grid

atkbold = ImageFont.truetype("Atkinson-Hyperlegible-Bold-102.otf",50)

default_width = 1280
default_height = 720
default_timesteps = 8

def mask_image_factory(mask_text="ASK FOR\nA SNACK", width=default_width, height=default_height):
    img = Image.new("L", (width, height), (0,))
    draw = ImageDraw.Draw(img)
    draw.multiline_text(
        xy=(0,0),
        text=mask_text,
        fill=(255,),
        font=atkbold,
        align="center",
        spacing=0,
    )
    cropped = img.crop(img.getbbox())
    # Calculate aspect ratios
    image_aspect_ratio = width / height
    cropped_aspect_ratio = cropped.size[0] / cropped.size[1]

    # Determine which dimension of cropped.size is larger
    if cropped_aspect_ratio > image_aspect_ratio:
        # Calculate new dimensions for padding
        new_width = int(cropped.size[1] * image_aspect_ratio)
        new_height = cropped.size[1]
    else:
        new_width = cropped.size[0]
        new_height = int(cropped.size[0] / image_aspect_ratio)

    # Pad the image to the desired aspect ratio
    padded = ImageOps.pad(cropped, (new_width, new_height))

    resized = padded.resize((width, height), resample=Image.Resampling.LANCZOS)
    return resized

preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
# preferred_device = "cpu"
preferred_dtype = torch.float32 if preferred_device == "cpu" else torch.float16

controlnet = ControlNetModel.from_pretrained(
#    "monster-labs/control_v1p_sd15_qrcode_monster",
    "monster-labs/control_v1p_sdxl_qrcode_monster",
#    subfolder="v2",
    torch_dtype=preferred_dtype,
    #torch_dtype=unet_preferred_dtype
).to(preferred_device)

#quantize(controlnet, weights=qfloat8)
#freeze(controlnet)

unet = UNet2DConditionModel.from_pretrained(
    "latent-consistency/lcm-sdxl",
    torch_dtype=preferred_dtype,
    variant="fp16",
).to(preferred_device)

ctlpipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    unet=unet,
    controlnet=controlnet,
    torch_dtype=preferred_dtype,
    safety_checker=None,
).to(preferred_device)

ctlpipe.scheduler = LCMScheduler.from_config(ctlpipe.scheduler.config)

#quantize(ctlpipe.unet, weights=qfloat8)
#freeze(ctlpipe.unet)
#quantize(ctlpipe.text_encoder, weights=qfloat8)
#freeze(ctlpipe.text_encoder)

def app(prompt, negative_prompt, mask_text, num_inference_steps, controlnet_conditioning_scale, width, height, seed, count):
    all_images = [ctlpipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=mask_image_factory(mask_text=mask_text, width=width, height=height),
        num_inference_steps=int(num_inference_steps),
        guidance_scale=8.0,
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        generator=torch.manual_seed(int(seed + i)),
        height=height,
        width=width,
    ).images[0] for i in range(count)]
    if count == 1:
        cols = 1
        rows = 1
    elif count == 2:
        cols = 1
        rows = 2
    else:
        cols = 2 if count % 2 == 0 else 1
        rows = count // cols
    return make_image_grid(all_images, cols=cols, rows=rows)


app("corgis running in the park", "ugly, wrong", "ASK FOR\nA SNACK", 1, 1.0, default_height, default_width, 42, 1)

iface = gr.Interface(
    app,
    [
        gr.Textbox(label="Prompt", value="lots of puppies frolicking in a flower-filled meadow around tall trees at golden hour"),
        gr.Textbox(label="Negative Prompt", value="ugly, wrong"),
        gr.Textbox(label="Mask Text", value="ASK FOR\nA SNACK"),
        gr.Number(label="Number of Inference Steps", value=default_timesteps, minimum=1, maximum=50, step=1),
        gr.Slider(label="ControlNet Conditioning Scale", value=0.57, minimum=-1.0, maximum=2.0, step=0.01),
        gr.Number(label="Width", value=default_width, minimum=256, maximum=2048, precision=0),
        gr.Number(label="Height", value=default_height, minimum=256, maximum=2048, precision=0),
        gr.Number(label="Random Number Seed", value=42, minimum=0, maximum=2**32-1, precision=0),
        gr.Radio(label="Number of Images to Generate with Subsequent Consecutive Seeds", choices=[1, 2, 4, 6, 10], value=2),
    ],
    "image",
)

iface.launch()