lixiang46 commited on
Commit
08f2519
1 Parent(s): 88a3aee
Files changed (3) hide show
  1. app.py +41 -16
  2. image/bird.png +0 -3
  3. image/dog.png +0 -3
app.py CHANGED
@@ -23,15 +23,21 @@ device = "cuda"
23
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
24
  ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
25
  ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
 
26
 
27
  text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
28
  tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
29
  vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
30
  scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
31
  unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
 
32
  controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
33
  controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
34
 
 
 
 
 
35
  pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
36
  vae=vae,
37
  controlnet = controlnet_depth,
@@ -52,6 +58,14 @@ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
52
  force_zeros_for_empty_prompt=False
53
  )
54
 
 
 
 
 
 
 
 
 
55
  @spaces.GPU
56
  def process_canny_condition(image, canny_threods=[100,200]):
57
  np_image = image.copy()
@@ -77,6 +91,7 @@ MAX_IMAGE_SIZE = 1024
77
  @spaces.GPU
78
  def infer_depth(prompt,
79
  image = None,
 
80
  negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
81
  seed = 397886929,
82
  randomize_seed = False,
@@ -84,19 +99,22 @@ def infer_depth(prompt,
84
  num_inference_steps = 50,
85
  controlnet_conditioning_scale = 0.7,
86
  control_guidance_end = 0.9,
87
- strength = 1.0
 
88
  ):
89
  if randomize_seed:
90
  seed = random.randint(0, MAX_SEED)
91
  generator = torch.Generator().manual_seed(seed)
92
  init_image = resize_image(image, MAX_IMAGE_SIZE)
93
- pipe = pipe_depth.to("cuda")
 
94
  condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
95
  image = pipe(
96
  prompt= prompt ,
97
  image = init_image,
98
  controlnet_conditioning_scale = controlnet_conditioning_scale,
99
  control_guidance_end = control_guidance_end,
 
100
  strength= strength ,
101
  control_image = condi_img,
102
  negative_prompt= negative_prompt ,
@@ -110,6 +128,7 @@ def infer_depth(prompt,
110
  @spaces.GPU
111
  def infer_canny(prompt,
112
  image = None,
 
113
  negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
114
  seed = 397886929,
115
  randomize_seed = False,
@@ -117,19 +136,22 @@ def infer_canny(prompt,
117
  num_inference_steps = 50,
118
  controlnet_conditioning_scale = 0.7,
119
  control_guidance_end = 0.9,
120
- strength = 1.0
 
121
  ):
122
  if randomize_seed:
123
  seed = random.randint(0, MAX_SEED)
124
  generator = torch.Generator().manual_seed(seed)
125
  init_image = resize_image(image, MAX_IMAGE_SIZE)
126
- pipe = pipe_canny.to("cuda")
 
127
  condi_img = process_canny_condition(np.array(init_image))
128
  image = pipe(
129
  prompt= prompt ,
130
  image = init_image,
131
  controlnet_conditioning_scale = controlnet_conditioning_scale,
132
  control_guidance_end = control_guidance_end,
 
133
  strength= strength ,
134
  control_image = condi_img,
135
  negative_prompt= negative_prompt ,
@@ -141,17 +163,13 @@ def infer_canny(prompt,
141
  return [condi_img, image], seed
142
 
143
  canny_examples = [
144
- ["一个漂亮的女孩,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
145
- "image/woman_1.png"],
146
- ["全景,一只可爱的白色小狗坐在杯子里,看向镜头,动漫风格,3d渲染,辛烷值渲染",
147
- "image/dog.png"]
148
  ]
149
 
150
  depth_examples = [
151
- ["新海诚风格,丰富的色彩,穿着绿色衬衫的女人站在田野里,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
152
- "image/woman_2.png"],
153
- ["一只颜色鲜艳的小鸟,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
154
- "image/bird.png"]
155
  ]
156
 
157
  css="""
@@ -239,6 +257,13 @@ with gr.Blocks(css=css) as Kolors:
239
  step=0.1,
240
  value=1.0,
241
  )
 
 
 
 
 
 
 
242
  with gr.Row():
243
  canny_button = gr.Button("Canny", elem_id="button")
244
  depth_button = gr.Button("Depth", elem_id="button")
@@ -251,7 +276,7 @@ with gr.Blocks(css=css) as Kolors:
251
  gr.Examples(
252
  fn = infer_canny,
253
  examples = canny_examples,
254
- inputs = [prompt, image],
255
  outputs = [result, seed_used],
256
  label = "Canny"
257
  )
@@ -259,20 +284,20 @@ with gr.Blocks(css=css) as Kolors:
259
  gr.Examples(
260
  fn = infer_depth,
261
  examples = depth_examples,
262
- inputs = [prompt, image],
263
  outputs = [result, seed_used],
264
  label = "Depth"
265
  )
266
 
267
  canny_button.click(
268
  fn = infer_canny,
269
- inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
270
  outputs = [result, seed_used]
271
  )
272
 
273
  depth_button.click(
274
  fn = infer_depth,
275
- inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
276
  outputs = [result, seed_used]
277
  )
278
 
 
23
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
24
  ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
25
  ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
26
+ ckpt_dir_ipa = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")
27
 
28
  text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
29
  tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
30
  vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
31
  scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
32
  unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
33
+
34
  controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
35
  controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
36
 
37
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus/image_encoder', ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
38
+ ip_img_size = 336
39
+ clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size )
40
+
41
  pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
42
  vae=vae,
43
  controlnet = controlnet_depth,
 
58
  force_zeros_for_empty_prompt=False
59
  )
60
 
61
+ @spaces.GPU
62
+ def load_ipa(pipe):
63
+ if hasattr(pipe.unet, 'encoder_hid_proj'):
64
+ pipe.unet.text_encoder_hid_proj = pipe.unet.encoder_hid_proj
65
+
66
+ pipe.load_ip_adapter( f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])
67
+ return pipe
68
+
69
  @spaces.GPU
70
  def process_canny_condition(image, canny_threods=[100,200]):
71
  np_image = image.copy()
 
91
  @spaces.GPU
92
  def infer_depth(prompt,
93
  image = None,
94
+ ipa_img = None,
95
  negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
96
  seed = 397886929,
97
  randomize_seed = False,
 
99
  num_inference_steps = 50,
100
  controlnet_conditioning_scale = 0.7,
101
  control_guidance_end = 0.9,
102
+ strength = 1.0,
103
+ ip_scale = 0.5,
104
  ):
105
  if randomize_seed:
106
  seed = random.randint(0, MAX_SEED)
107
  generator = torch.Generator().manual_seed(seed)
108
  init_image = resize_image(image, MAX_IMAGE_SIZE)
109
+ pipe = load_ipa(pipe_depth).to("cuda")
110
+ pipe.set_ip_adapter_scale([ip_scale])
111
  condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
112
  image = pipe(
113
  prompt= prompt ,
114
  image = init_image,
115
  controlnet_conditioning_scale = controlnet_conditioning_scale,
116
  control_guidance_end = control_guidance_end,
117
+ ip_adapter_image=[ipa_img],
118
  strength= strength ,
119
  control_image = condi_img,
120
  negative_prompt= negative_prompt ,
 
128
  @spaces.GPU
129
  def infer_canny(prompt,
130
  image = None,
131
+ ipa_img = None,
132
  negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
133
  seed = 397886929,
134
  randomize_seed = False,
 
136
  num_inference_steps = 50,
137
  controlnet_conditioning_scale = 0.7,
138
  control_guidance_end = 0.9,
139
+ strength = 1.0,
140
+ ip_scale = 0.5,
141
  ):
142
  if randomize_seed:
143
  seed = random.randint(0, MAX_SEED)
144
  generator = torch.Generator().manual_seed(seed)
145
  init_image = resize_image(image, MAX_IMAGE_SIZE)
146
+ pipe = load_ipa(pipe_canny).to("cuda")
147
+ pipe.set_ip_adapter_scale([ip_scale])
148
  condi_img = process_canny_condition(np.array(init_image))
149
  image = pipe(
150
  prompt= prompt ,
151
  image = init_image,
152
  controlnet_conditioning_scale = controlnet_conditioning_scale,
153
  control_guidance_end = control_guidance_end,
154
+ ip_adapter_image=[ipa_img],
155
  strength= strength ,
156
  control_image = condi_img,
157
  negative_prompt= negative_prompt ,
 
163
  return [condi_img, image], seed
164
 
165
  canny_examples = [
166
+ ["一个红色头发的女孩,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
167
+ "image/woman_2.png", "image/2.png"],
 
 
168
  ]
169
 
170
  depth_examples = [
171
+ ["一个漂亮的女孩,最好的质量,超细节,8K画质",
172
+ "image/1.png","image/woman_1.png"],
 
 
173
  ]
174
 
175
  css="""
 
257
  step=0.1,
258
  value=1.0,
259
  )
260
+ ip_scale = gr.Slider(
261
+ label="IP_Scale",
262
+ minimum=0.0,
263
+ maximum=1.0,
264
+ step=0.1,
265
+ value=0.5,
266
+ )
267
  with gr.Row():
268
  canny_button = gr.Button("Canny", elem_id="button")
269
  depth_button = gr.Button("Depth", elem_id="button")
 
276
  gr.Examples(
277
  fn = infer_canny,
278
  examples = canny_examples,
279
+ inputs = [prompt, image, ipa_image],
280
  outputs = [result, seed_used],
281
  label = "Canny"
282
  )
 
284
  gr.Examples(
285
  fn = infer_depth,
286
  examples = depth_examples,
287
+ inputs = [prompt, image, ipa_image],
288
  outputs = [result, seed_used],
289
  label = "Depth"
290
  )
291
 
292
  canny_button.click(
293
  fn = infer_canny,
294
+ inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
295
  outputs = [result, seed_used]
296
  )
297
 
298
  depth_button.click(
299
  fn = infer_depth,
300
+ inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
301
  outputs = [result, seed_used]
302
  )
303
 
image/bird.png DELETED

Git LFS Details

  • SHA256: e74821365819a2141455e85d5a1c4fa443167dc707e296059c6f4a9d3d93b2f5
  • Pointer size: 131 Bytes
  • Size of remote file: 612 kB
image/dog.png DELETED

Git LFS Details

  • SHA256: a48c9d517b9a9bd27f31c7fa7e6e4128e27e485168c566dc88db9ece60703338
  • Pointer size: 132 Bytes
  • Size of remote file: 1.48 MB