File size: 9,363 Bytes
60e529d
 
 
a503b81
60e529d
 
 
a503b81
60e529d
a503b81
60e529d
 
 
 
333093e
60e529d
 
 
0da8a10
 
 
 
 
 
60e529d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333093e
60e529d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# adapted from https://huggingface.co/spaces/HumanAIGC/OutfitAnyone/blob/main/app.py
import torch
import spaces
import gradio as gr
from PIL import Image
import numpy as np
from torchvision import transforms

from transformers import CLIPTextModel, CLIPTokenizer

from diffusers import UniPCMultistepScheduler
from diffusers import AutoencoderKL
from diffusers import StableDiffusionPipeline
from diffusers.loaders import LoraLoaderMixin

import os
from os.path import join as opj

token = os.getenv("ACCESS_TOKEN")
os.system(f"python -m pip install git+https://{token}@github.com/logn-2024/StableGarment.git")

from stablegarment.models import AppearanceEncoderModel,ControlNetModel
from stablegarment.piplines import StableGarmentPipeline,StableGarmentControlNetPipeline

device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if "cuda"==device else torch.float32
height = 512
width = 384

base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to(dtype=torch_dtype,device=device)
scheduler = UniPCMultistepScheduler.from_pretrained("runwayml/stable-diffusion-v1-5",subfolder="scheduler")

pretrained_garment_encoder_path = "StableGarment_text2img"
garment_encoder = AppearanceEncoderModel.from_pretrained(pretrained_garment_encoder_path,torch_dtype=torch_dtype,subfolder="garment_encoder")
garment_encoder = garment_encoder.to(device=device,dtype=torch_dtype)

pipeline_t2i = StableGarmentPipeline.from_pretrained(base_model_path, vae=vae, torch_dtype=torch_dtype, variant="fp16").to(device=device)
# pipeline = StableDiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", vae=vae, torch_dtype=torch_dtype, variant="fp16").to(device=device)
pipeline_t2i.scheduler = scheduler

pipeline_tryon = None
'''
# not ready
pretrained_model_path = "part_module_controlnet_imp2"
controlnet = ControlNetModel.from_pretrained(pretrained_model_path,subfolder="controlnet")
text_encoder = CLIPTextModel.from_pretrained(base_model_path, subfolder='text_encoder')
tokenizer = CLIPTokenizer.from_pretrained(base_model_path, subfolder='tokenizer')
pipeline_tryon = StableGarmentControlNetPipeline(
    vae,
    text_encoder, 
    tokenizer,
    pipeline_t2i.unet,
    controlnet,
    scheduler,
).to(device=device,dtype=torch_dtype)
'''


def prepare_controlnet_inputs(agn_mask_list,densepose_list):
    for i,agn_mask_img in enumerate(agn_mask_list):
        agn_mask_img = np.array(agn_mask_img.convert("L"))
        agn_mask_img = np.expand_dims(agn_mask_img, axis=-1)
        agn_mask_img = (agn_mask_img >= 128).astype(np.float32)  # 0 or 1
        agn_mask_list[i] = 1. - agn_mask_img
    densepose_list = [np.array(img)/255. for img in densepose_list]
    controlnet_inputs = []
    for mask,pose in zip(agn_mask_list,densepose_list):
        controlnet_inputs.append(torch.tensor(np.concatenate([mask, pose], axis=-1)).permute(2,0,1))
    controlnet_inputs = torch.stack(controlnet_inputs)
    return controlnet_inputs

@spaces.GPU(enable_queue=True)
def tryon(prompt,init_image,garment_top,garment_down,):
    basename = os.path.splitext(os.path.basename(init_image))[0]
    image_agn = Image.open(opj(parse_dir,basename+"_agn.jpg")).resize((width,height))
    image_agn_mask = Image.open(opj(parse_dir,basename+"_mask.png")).resize((width,height))
    densepose_image = Image.open(opj(parse_dir,basename+"_densepose.png")).resize((width,height))
    garment_top = Image.open(garment_top).resize((width,height))

    garment_images = [garment_top,]
    prompt = [prompt,]
    cloth_prompt = ["",]
    controlnet_condition = prepare_controlnet_inputs([image_agn_mask],[densepose_image])

    images = pipeline_tryon(prompt, negative_prompt="",cloth_prompt=cloth_prompt, # negative_cloth_prompt = n_prompt,
                  height=height,width=width,num_inference_steps=25,guidance_scale=1.5,eta=0.0,
                  controlnet_condition=controlnet_condition,reference_image=garment_images, 
                  garment_encoder=garment_encoder,condition_extra=image_agn,
                  generator=None,).images
    return images[0]

@spaces.GPU(enable_queue=True)
def text2image(prompt,init_image,garment_top,garment_down,style_fidelity=1.):

    garment_top = Image.open(garment_top).resize((width,height))
    garment_top = transforms.CenterCrop((height,width))(transforms.Resize(max(height, width))(garment_top))

    garment_images = [garment_top,]
    prompt = [prompt,]
    cloth_prompt = ["",]
    n_prompt = "nsfw, unsaturated, abnormal, unnatural, artifact"
    negative_prompt = [n_prompt]
    images = pipeline_t2i(prompt,negative_prompt=negative_prompt,cloth_prompt=cloth_prompt,height=height,width=width,
                    num_inference_steps=30,guidance_scale=4,num_images_per_prompt=1,style_fidelity=style_fidelity,
                    garment_encoder=garment_encoder,garment_image=garment_images,).images
    return images[0]

# def text2image(prompt,init_image,garment_top,garment_down,):
#     return pipeline(prompt).images[0]

def infer(prompt,init_image,garment_top,garment_down,t2i_only,style_fidelity):
    if t2i_only:
        return text2image(prompt,init_image,garment_top,garment_down,style_fidelity)
    else:
        return tryon(prompt,init_image,garment_top,garment_down)

init_state,prompt_state = None,""
t2i_only_state = True
def set_mode(t2i_only,person_condition,prompt):
    global init_state, prompt_state, t2i_only_state
    t2i_only_state = not t2i_only_state
    init_state, prompt_state =  person_condition or init_state, prompt_state or prompt
    if t2i_only:
        return [gr.Image(sources='clipboard', type="filepath", label="model",value=None, interactive=False),
                gr.Textbox(placeholder="", label="prompt(for t2i)", value=prompt_state, interactive=True),
                ]
    else:
        return [gr.Image(sources='clipboard', type="filepath", label="model",value=init_state, interactive=False),
                gr.Textbox(placeholder="", label="prompt(for t2i)", value="", interactive=False),
                ]

def example_fn(inputs,):
    if t2i_only_state:
        return gr.Image(sources='clipboard', type="filepath", label="model", value=None, interactive=False)
    return gr.Image(sources='clipboard', type="filepath", label="model",value=inputs, interactive=False)

gr.set_static_paths(paths=["assets/images/model"])
model_dir = opj(os.path.dirname(__file__), "assets/images/model")
garment_dir = opj(os.path.dirname(__file__), "assets/images/garment")
parse_dir = opj(os.path.dirname(__file__), "assets/images/image_parse")

model = opj(model_dir, "13987_00.jpg")
all_person = [opj(model_dir,fname) for fname in os.listdir(model_dir) if fname.endswith(".jpg")]
with gr.Blocks(css = ".output-image, .input-image, .image-preview {height: 400px !important} ", ) as gradio_app:
    gr.Markdown("# StableGarment")
    with gr.Row():
        with gr.Column():
            init_image = gr.Image(sources='clipboard', type="filepath", label="model", value=None, interactive=False)
            example = gr.Examples(inputs=gr.Image(visible=False), #init_image,
                                  examples_per_page=4,
                                  examples=all_person,
                                  run_on_click=True,
                                  outputs=init_image,
                                  fn=example_fn,)
        with gr.Column():
            with gr.Row():
                images_top = [opj(garment_dir,fname) for fname in os.listdir(garment_dir) if fname.endswith(".jpg")]
                garment_top = gr.Image(sources='upload', type="filepath", label="top garment",value=images_top[0]) # ,interactive=False
                example_top = gr.Examples(inputs=garment_top,
                                            examples_per_page=4,
                                            examples=images_top)
                images_down = []
                garment_down = gr.Image(sources='upload', type="filepath", label="lower garment",interactive=False, visible=False)
                example_down = gr.Examples(inputs=garment_down,
                                            examples_per_page=4,
                                            examples=images_down)
            prompt = gr.Textbox(placeholder="", label="prompt(for t2i)",) # interactive=False
            with gr.Row():
                t2i_only = gr.Checkbox(label="t2i with garment", info="Only text and garment.", elem_id="t2i_switch", value=True, interactive=False,)
                run_button = gr.Button(value="Run")
                style_fidelity = gr.Slider(0, 1, value=1, label="fidelity(for t2i)") # , info=""
                t2i_only.change(fn=set_mode,inputs=[t2i_only,init_image,prompt],outputs=[init_image,prompt,])
        with gr.Column():
            gallery = gr.Image()
            run_button.click(fn=infer, 
                            inputs=[
                                    prompt,
                                    init_image,
                                    garment_top,
                                    garment_down,
                                    t2i_only,
                                    style_fidelity,
                                    ], 
                            outputs=[gallery],)
    
if __name__ == "__main__":
    gradio_app.launch()