samir-fama commited on
Commit
96891ca
1 Parent(s): fbbd1d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -54
app.py CHANGED
@@ -1,33 +1,54 @@
 
1
  from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
 
2
  from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
 
3
  from insightface.app import FaceAnalysis
4
  from insightface.utils import face_align
5
-
6
  from huggingface_hub import hf_hub_download
7
- import torch
8
 
9
- from PIL import Image
10
- import cv2
11
 
12
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- hf_hub_download(repo_id='h94/IP-Adapter-FaceID', filename='ip-adapter-faceid-plus_sd15.bin', local_dir='IP-Adapter-FaceID')
15
- hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/config.json', local_dir='IP-Adapter')
16
- hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/pytorch_model.bin', local_dir='IP-Adapter')
17
 
18
  def get_ip_model():
 
19
  base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
20
  vae_model_path = "stabilityai/sd-vae-ft-mse"
21
  image_encoder_path = "IP-Adapter/models/image_encoder"
22
  ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
23
 
24
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
 
 
 
26
  print(f'Using device: {device}')
27
 
28
- noise_scheduler = DDIMScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012,
29
- beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False, steps_offset=1)
30
-
 
 
 
 
 
 
31
  vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
32
  pipe = StableDiffusionPipeline.from_pretrained(
33
  base_model_path,
@@ -42,58 +63,57 @@ def get_ip_model():
42
  return ip_model
43
 
44
 
45
- def generate_images(prompt, img_filepath, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
46
- img_prompt_scale=0.5, num_inference_steps=30, seed=None, n_images=1):
 
 
 
 
 
 
 
 
47
  image = cv2.imread(img_filepath)
48
  faces = app.get(image)
49
 
50
  faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
51
- face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224)
52
  images = ip_model.generate(
53
  prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
54
  num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
55
- scale=img_prompt_scale,
56
  )
57
  return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
58
 
59
- if __name__ == "__main__":
60
- ip_model = get_ip_model()
61
- app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
62
- app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
63
 
64
-
65
- with gr.Blocks() as demo:
66
- gr.Markdown(
67
  """
68
- # ✨ Image Prompt Adapter With FaceID 🧙‍♂️
69
-
70
- Unleash the magic of generating whimsical images with just an image and a sprinkle of text! Learn the secrets here: [Magic Link](https://huggingface.co/h94/IP-Adapter-FaceID)
71
-
72
- 🚀 This enchanting demo is designed to soar on GPU. While it can still dance on CPU, conjuring just one image might take up to 600 seconds—compared to the blink-of-an-eye magic on GPU! ✨
73
  """)
74
- with gr.Row():
75
- with gr.Column():
76
- demo_inputs = []
77
- demo_inputs.append(gr.Textbox(label='text prompt', value='A bold rider in a white horse'))
78
- demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
79
- with gr.Accordion(label='Advanced options', open=False):
80
- demo_inputs.append(gr.Textbox(label='negative text prompt',
81
- value="deformed hands, watermark, text, deformed fingers, blurred faces, irregular face, irrregular body shape, ugly eyes, deformed face, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, ugly eyes, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, disfigured, kitsch, ugly, oversaturated, grain, low-res, Deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, ugly, disgusting, poorly drawn, childish, mutilated, mangled, old, surreal, 2 heads, 2 faces"))
82
- demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
83
- btn = gr.Button("Generate")
84
-
85
- with gr.Column():
86
- demo_outputs = []
87
- demo_outputs.append(gr.Image(label='generated image'))
88
- demo_outputs.append(gr.Image(label='detected face', height=224, width=224))
89
- btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
90
- sample_prompts = [
91
- 'A wizard casting spells in a coffee shop',
92
- 'A penguin teaching a yoga class',
93
- 'A robot composing a symphony',
94
- 'A giraffe participating in a slam poetry contest',
95
- 'A bold rider in a white horse'
96
  ]
97
- gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')
98
-
99
- demo.launch(share=True, debug=True)
 
1
+ import torch
2
  from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
3
+ from PIL import Image
4
  from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
5
+ import cv2
6
  from insightface.app import FaceAnalysis
7
  from insightface.utils import face_align
8
+ import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
+ from datetime import datetime
11
 
 
 
12
 
13
+ def download_models():
14
+ hf_hub_download(
15
+ repo_id='h94/IP-Adapter-FaceID',
16
+ filename='ip-adapter-faceid-plus_sd15.bin',
17
+ local_dir='IP-Adapter-FaceID')
18
+ hf_hub_download(
19
+ repo_id='h94/IP-Adapter',
20
+ filename='models/image_encoder/config.json',
21
+ local_dir='IP-Adapter')
22
+ hf_hub_download(
23
+ repo_id='h94/IP-Adapter',
24
+ filename='models/image_encoder/pytorch_model.bin',
25
+ local_dir='IP-Adapter')
26
 
 
 
 
27
 
28
  def get_ip_model():
29
+ download_models()
30
  base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
31
  vae_model_path = "stabilityai/sd-vae-ft-mse"
32
  image_encoder_path = "IP-Adapter/models/image_encoder"
33
  ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
34
 
35
+ if torch.cuda.is_available():
36
+ device = 'cuda'
37
+ torch_dtype = torch.float16
38
+ else:
39
+ device = 'cpu'
40
+ torch_dtype = torch.float32
41
  print(f'Using device: {device}')
42
 
43
+ noise_scheduler = DDIMScheduler(
44
+ num_train_timesteps=1000,
45
+ beta_start=0.00085,
46
+ beta_end=0.012,
47
+ beta_schedule="scaled_linear",
48
+ clip_sample=False,
49
+ set_alpha_to_one=False,
50
+ steps_offset=1,
51
+ )
52
  vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
53
  pipe = StableDiffusionPipeline.from_pretrained(
54
  base_model_path,
 
63
  return ip_model
64
 
65
 
66
+ ip_model = get_ip_model()
67
+ app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
68
+ app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
69
+
70
+ def generate_images(prompt, img_filepath,
71
+ negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
72
+ img_prompt_scale=0.5,
73
+ num_inference_steps=30,
74
+ seed=None, n_images=1):
75
+ print(f'{datetime.now().strftime("%Y/%m/%d %H:%M:%S")}: {prompt}')
76
  image = cv2.imread(img_filepath)
77
  faces = app.get(image)
78
 
79
  faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
80
+ face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224) # you can also segment the face
81
  images = ip_model.generate(
82
  prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
83
  num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
84
+ scale=img_prompt_scale, # with scale=1 I get weird images
85
  )
86
  return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
87
 
 
 
 
 
88
 
89
+ with gr.Blocks() as demo:
90
+ gr.Markdown(
 
91
  """
92
+ # IP-Adapter-FaceID-plus
93
+ Generate images conditioned on a image prompt and a text prompt. Learn more here: https://huggingface.co/h94/IP-Adapter-FaceID
94
+ This demo is intended to use on GPU. It will work also on CPU but generating one image could take 900 seconds compared to a few seconds on GPU.
 
 
95
  """)
96
+ with gr.Row():
97
+ with gr.Column():
98
+ demo_inputs = []
99
+ demo_inputs.append(gr.Textbox(label='text prompt', value='Linkedin profile picture'))
100
+ demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
101
+ with gr.Accordion(label='Advanced options', open=False):
102
+ demo_inputs.append(gr.Textbox(label='negative text prompt', value="monochrome, lowres, bad anatomy, worst quality, low quality, blurry"))
103
+ demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
104
+ btn = gr.Button("Generate")
105
+
106
+ with gr.Column():
107
+ demo_outputs = []
108
+ demo_outputs.append(gr.Image(label='generated image'))
109
+ demo_outputs.append(gr.Image(label='detected face', height=224, width=224))
110
+ btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
111
+ sample_prompts = [
112
+ 'Linkedin profile picture',
113
+ 'A singer on stage',
114
+ 'A politician talking to the people',
115
+ 'An astronaut in space',
 
 
116
  ]
117
+ gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')
118
+
119
+ demo.launch(share=True, debug=True)