flxcontrol

Running on Zero

App Files Files Community

flxcontrol / app.py

fantos

Update app.py

1666a97 verified 2 months ago

raw

history blame contribute delete

10.4 kB

	import sys
	sys.path.append('./')

	import gradio as gr
	import spaces
	import os
	import sys
	import subprocess
	import numpy as np
	from PIL import Image
	import cv2
	import torch
	import random
	from transformers import pipeline

	os.system("pip install -e ./controlnet_aux")

	from controlnet_aux import OpenposeDetector, CannyDetector
	from depth_anything_v2.dpt import DepthAnythingV2

	from huggingface_hub import hf_hub_download

	from huggingface_hub import login
	hf_token = os.environ.get("HF_TOKEN_GATED")
	login(token=hf_token)

	MAX_SEED = np.iinfo(np.int32).max

	# 번역기 설정
	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")

	def translate_to_english(text):
	if any('\uAC00' <= char <= '\uD7A3' for char in text):
	return translator(text, max_length=512)[0]['translation_text']
	return text

	def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	return seed

	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
	}

	encoder = 'vitl'
	model = DepthAnythingV2(**model_configs[encoder])
	filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-Large", filename=f"depth_anything_v2_vitl.pth", repo_type="model")
	state_dict = torch.load(filepath, map_location="cpu")
	model.load_state_dict(state_dict)
	model = model.to(DEVICE).eval()

	import torch
	from diffusers.utils import load_image
	from diffusers import FluxControlNetPipeline, FluxControlNetModel
	from diffusers.models import FluxMultiControlNetModel

	base_model = 'black-forest-labs/FLUX.1-dev'
	controlnet_model = 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro'
	controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16)
	controlnet = FluxMultiControlNetModel([controlnet])
	pipe = FluxControlNetPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16)
	pipe.to("cuda")

	mode_mapping = {"캐니":0, "타일":1, "깊이":2, "블러":3, "오픈포즈":4, "그레이스케일":5, "저품질": 6}
	strength_mapping = {"캐니":0.65, "타일":0.45, "깊이":0.55, "블러":0.45, "오픈포즈":0.55, "그레이스케일":0.45, "저품질": 0.4}

	canny = CannyDetector()
	open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")

	torch.backends.cuda.matmul.allow_tf32 = True
	pipe.vae.enable_tiling()
	pipe.vae.enable_slicing()
	pipe.enable_model_cpu_offload() # for saving memory

	def convert_from_image_to_cv2(img: Image) -> np.ndarray:
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def convert_from_cv2_to_image(img: np.ndarray) -> Image:
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	def extract_depth(image):
	image = np.asarray(image)
	depth = model.infer_image(image[:, :, ::-1])
	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.astype(np.uint8)
	gray_depth = Image.fromarray(depth).convert('RGB')
	return gray_depth

	def extract_openpose(img):
	processed_image_open_pose = open_pose(img, hand_and_face=True)
	return processed_image_open_pose

	def extract_canny(image):
	processed_image_canny = canny(image)
	return processed_image_canny

	def apply_gaussian_blur(image, kernel_size=(21, 21)):
	image = convert_from_image_to_cv2(image)
	blurred_image = convert_from_cv2_to_image(cv2.GaussianBlur(image, kernel_size, 0))
	return blurred_image

	def convert_to_grayscale(image):
	image = convert_from_image_to_cv2(image)
	gray_image = convert_from_cv2_to_image(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
	return gray_image

	def add_gaussian_noise(image, mean=0, sigma=10):
	image = convert_from_image_to_cv2(image)
	noise = np.random.normal(mean, sigma, image.shape)
	noisy_image = convert_from_cv2_to_image(np.clip(image.astype(np.float32) + noise, 0, 255).astype(np.uint8))
	return noisy_image

	def tile(input_image, resolution=768):
	input_image = convert_from_image_to_cv2(input_image)
	H, W, C = input_image.shape
	H = float(H)
	W = float(W)
	k = float(resolution) / min(H, W)
	H *= k
	W *= k
	H = int(np.round(H / 64.0)) * 64
	W = int(np.round(W / 64.0)) * 64
	img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
	img = convert_from_cv2_to_image(img)
	return img

	def resize_img(input_image, max_side=768, min_side=512, size=None,
	pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):

	w, h = input_image.size
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratiow), round(ratioh)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratiow), round(ratioh)], mode)
	w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
	h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
	input_image = Image.fromarray(res)
	return input_image

	@spaces.GPU()
	def infer(cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed, progress=gr.Progress(track_tqdm=True)):

	control_mode_num = mode_mapping[control_mode]
	prompt = translate_to_english(prompt)

	if cond_in is None:
	if image_in is not None:
	image_in = resize_img(load_image(image_in))
	if control_mode == "캐니":
	control_image = extract_canny(image_in)
	elif control_mode == "깊이":
	control_image = extract_depth(image_in)
	elif control_mode == "오픈포즈":
	control_image = extract_openpose(image_in)
	elif control_mode == "블러":
	control_image = apply_gaussian_blur(image_in)
	elif control_mode == "저품질":
	control_image = add_gaussian_noise(image_in)
	elif control_mode == "그레이스케일":
	control_image = convert_to_grayscale(image_in)
	elif control_mode == "타일":
	control_image = tile(image_in)
	else:
	control_image = resize_img(load_image(cond_in))

	width, height = control_image.size

	image = pipe(
	prompt,
	control_image=[control_image],
	control_mode=[control_mode_num],
	width=width,
	height=height,
	controlnet_conditioning_scale=[control_strength],
	num_inference_steps=inference_steps,
	guidance_scale=guidance_scale,
	generator=torch.manual_seed(seed),
	).images[0]

	torch.cuda.empty_cache()

	return image, control_image, gr.update(visible=True)


	css = """
	footer {
	visibility: hidden;
	}
	"""

	with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
	with gr.Column(elem_id="col-container"):

	with gr.Column():

	with gr.Row():
	with gr.Column():

	with gr.Row(equal_height=True):
	cond_in = gr.Image(label="처리된 컨트롤 이미지 업로드", sources=["upload"], type="filepath")
	image_in = gr.Image(label="참조 이미지에서 조건 추출 (선택사항)", sources=["upload"], type="filepath")

	prompt = gr.Textbox(label="프롬프트", value="최고 품질")

	with gr.Accordion("컨트롤넷"):
	control_mode = gr.Radio(
	["캐니", "깊이", "오픈포즈", "그레이스케일", "블러", "타일", "저품질"], label="모드", value="그레이스케일",
	info="컨트롤 모드 선택, 모든 이미지에 적용됩니다"
	)

	control_strength = gr.Slider(
	label="컨트롤 강도",
	minimum=0,
	maximum=1.0,
	step=0.05,
	value=0.50,
	)

	seed = gr.Slider(
	label="시드",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)
	randomize_seed = gr.Checkbox(label="시드 무작위화", value=True)

	with gr.Accordion("고급 설정", open=False):
	with gr.Column():
	with gr.Row():
	inference_steps = gr.Slider(label="추론 단계", minimum=1, maximum=50, step=1, value=24)
	guidance_scale = gr.Slider(label="가이던스 스케일", minimum=1.0, maximum=10.0, step=0.1, value=3.5)

	submit_btn = gr.Button("제출")

	with gr.Column():
	result = gr.Image(label="결과")
	processed_cond = gr.Image(label="전처리된 조건")

	submit_btn.click(
	fn=randomize_seed_fn,
	inputs=[seed, randomize_seed],
	outputs=seed,
	queue=False,
	api_name=False
	).then(
	fn = infer,
	inputs = [cond_in, image_in, prompt, inference_steps, guidance_scale, control_mode, control_strength, seed],
	outputs = [result, processed_cond],
	show_api=False
	)

	demo.queue(api_open=False)
	demo.launch()