sd3-ControlNet / app.py
fffiloni's picture
Update app.py
5672e0b verified
raw
history blame
5.65 kB
import gradio as gr
import spaces
import os
import sys
import subprocess
import numpy as np
from PIL import Image
import cv2
import torch
from diffusers import StableDiffusion3ControlNetPipeline
from diffusers.models import SD3ControlNetModel, SD3MultiControlNetModel
from diffusers.utils import load_image
# load pipeline
controlnet_canny = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny")
controlnet_tile = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Tile")
def resize_image(input_path, output_path, target_height):
# Open the input image
img = Image.open(input_path)
# Calculate the aspect ratio of the original image
original_width, original_height = img.size
original_aspect_ratio = original_width / original_height
# Calculate the new width while maintaining the aspect ratio and the target height
new_width = int(target_height * original_aspect_ratio)
# Resize the image while maintaining the aspect ratio and fixing the height
img = img.resize((new_width, target_height), Image.LANCZOS)
# Save the resized image
img.save(output_path)
return output_path, new_width, target_height
def show_hidden():
return gr.update(visible=True)
def load_pipeline(control_type, progress=gr.Progress(track_tqdm=True)):
global pipe_canny, pipe_tile
if control_type == "canny":
global pipe_canny
pipe_canny = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers",
controlnet=controlnet_canny
)
elif control_type == "tile":
global pipe_tile
pipe_tile = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers",
controlnet=controlnet_tile
)
return gr.update(value="pipeline ready", visible=True)
@spaces.GPU(duration=90)
def infer(image_in, prompt, control_type, inference_steps, guidance_scale, control_weight, progress=gr.Progress(track_tqdm=True)):
global pipe, pipe_canny, pipe_tile
n_prompt = 'NSFW, nude, naked, porn, ugly'
if control_type == "canny":
pipe = pipe_canny
pipe.to("cuda", torch.float16)
# Canny preprocessing
image_to_canny = load_image(image_in)
image_to_canny = np.array(image_to_canny)
image_to_canny = cv2.Canny(image_to_canny, 100, 200)
image_to_canny = image_to_canny[:, :, None]
image_to_canny = np.concatenate([image_to_canny, image_to_canny, image_to_canny], axis=2)
image_to_canny = Image.fromarray(image_to_canny)
control_image = image_to_canny
elif control_type == "tile":
pipe = pipe_tile
pipe.to("cuda", torch.float16)
control_image = load_image(image_in)
# infer
image = pipe(
prompt=prompt,
negative_prompt=n_prompt,
control_image=control_image,
controlnet_conditioning_scale=control_weight,
num_inference_steps=inference_steps,
guidance_scale=guidance_scale,
).images[0]
if control_type == "canny":
image_redim, w, h = resize_image(image_in, "resized_input.jpg", 1024)
image = image.resize((w, h), Image.LANCZOS)
return image, gr.update(value=image_to_canny, visible=True)
elif control_type == "tile":
return image, gr.update(value=None, visible=False)
css="""
#col-container{
margin: 0 auto;
max-width: 1080px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""
# SD3 ControlNet
Experiment with Stable Diffusion 3 ControlNet models proposed and maintained by the InstantX team.<br />
""")
with gr.Column():
with gr.Row():
with gr.Column():
image_in = gr.Image(label="Image reference", sources=["upload"], type="filepath")
prompt = gr.Textbox(label="Prompt")
control_type = gr.Radio(
label="Control type",
choices = [
"canny",
"tile"
],
value="canny"
)
with gr.Accordion("Advanced settings", open=False):
with gr.Column():
with gr.Row():
inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=25)
guidance_scale = gr.Slider(label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=7.0)
control_weight = gr.Slider(label="Control Weight", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
submit_canny_btn = gr.Button("Submit")
with gr.Column():
models = gr.Textbox(label="Plug-in pipes", visible=False)
result = gr.Image(label="Result")
canny_used = gr.Image(label="Preprocessed Canny", visible=False)
submit_canny_btn.click(
fn = show_hidden,
inputs = None,
outputs = [models]
).then(
fn = load_pipeline,
inputs = [control_type],
outputs = [models]
).then(
fn = infer,
inputs = [image_in, prompt, control_type, inference_steps, guidance_scale, control_weight],
outputs = [result, canny_used],
show_api=False
)
demo.queue().launch()