samir-fama
commited on
Commit
•
96891ca
1
Parent(s):
fbbd1d6
Update app.py
Browse files
app.py
CHANGED
@@ -1,33 +1,54 @@
|
|
|
|
1 |
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
|
|
|
2 |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
|
|
|
3 |
from insightface.app import FaceAnalysis
|
4 |
from insightface.utils import face_align
|
5 |
-
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
-
import
|
8 |
|
9 |
-
from PIL import Image
|
10 |
-
import cv2
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
hf_hub_download(repo_id='h94/IP-Adapter-FaceID', filename='ip-adapter-faceid-plus_sd15.bin', local_dir='IP-Adapter-FaceID')
|
15 |
-
hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/config.json', local_dir='IP-Adapter')
|
16 |
-
hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/pytorch_model.bin', local_dir='IP-Adapter')
|
17 |
|
18 |
def get_ip_model():
|
|
|
19 |
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
|
20 |
vae_model_path = "stabilityai/sd-vae-ft-mse"
|
21 |
image_encoder_path = "IP-Adapter/models/image_encoder"
|
22 |
ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
|
23 |
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
print(f'Using device: {device}')
|
27 |
|
28 |
-
noise_scheduler = DDIMScheduler(
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
|
32 |
pipe = StableDiffusionPipeline.from_pretrained(
|
33 |
base_model_path,
|
@@ -42,58 +63,57 @@ def get_ip_model():
|
|
42 |
return ip_model
|
43 |
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
image = cv2.imread(img_filepath)
|
48 |
faces = app.get(image)
|
49 |
|
50 |
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
|
51 |
-
face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224)
|
52 |
images = ip_model.generate(
|
53 |
prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
|
54 |
num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
|
55 |
-
scale=img_prompt_scale,
|
56 |
)
|
57 |
return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
|
58 |
|
59 |
-
if __name__ == "__main__":
|
60 |
-
ip_model = get_ip_model()
|
61 |
-
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
62 |
-
app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
gr.Markdown(
|
67 |
"""
|
68 |
-
#
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
🚀 This enchanting demo is designed to soar on GPU. While it can still dance on CPU, conjuring just one image might take up to 600 seconds—compared to the blink-of-an-eye magic on GPU! ✨
|
73 |
""")
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
'A giraffe participating in a slam poetry contest',
|
95 |
-
'A bold rider in a white horse'
|
96 |
]
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
1 |
+
import torch
|
2 |
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
|
3 |
+
from PIL import Image
|
4 |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
|
5 |
+
import cv2
|
6 |
from insightface.app import FaceAnalysis
|
7 |
from insightface.utils import face_align
|
8 |
+
import gradio as gr
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
+
from datetime import datetime
|
11 |
|
|
|
|
|
12 |
|
13 |
+
def download_models():
|
14 |
+
hf_hub_download(
|
15 |
+
repo_id='h94/IP-Adapter-FaceID',
|
16 |
+
filename='ip-adapter-faceid-plus_sd15.bin',
|
17 |
+
local_dir='IP-Adapter-FaceID')
|
18 |
+
hf_hub_download(
|
19 |
+
repo_id='h94/IP-Adapter',
|
20 |
+
filename='models/image_encoder/config.json',
|
21 |
+
local_dir='IP-Adapter')
|
22 |
+
hf_hub_download(
|
23 |
+
repo_id='h94/IP-Adapter',
|
24 |
+
filename='models/image_encoder/pytorch_model.bin',
|
25 |
+
local_dir='IP-Adapter')
|
26 |
|
|
|
|
|
|
|
27 |
|
28 |
def get_ip_model():
|
29 |
+
download_models()
|
30 |
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
|
31 |
vae_model_path = "stabilityai/sd-vae-ft-mse"
|
32 |
image_encoder_path = "IP-Adapter/models/image_encoder"
|
33 |
ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
|
34 |
|
35 |
+
if torch.cuda.is_available():
|
36 |
+
device = 'cuda'
|
37 |
+
torch_dtype = torch.float16
|
38 |
+
else:
|
39 |
+
device = 'cpu'
|
40 |
+
torch_dtype = torch.float32
|
41 |
print(f'Using device: {device}')
|
42 |
|
43 |
+
noise_scheduler = DDIMScheduler(
|
44 |
+
num_train_timesteps=1000,
|
45 |
+
beta_start=0.00085,
|
46 |
+
beta_end=0.012,
|
47 |
+
beta_schedule="scaled_linear",
|
48 |
+
clip_sample=False,
|
49 |
+
set_alpha_to_one=False,
|
50 |
+
steps_offset=1,
|
51 |
+
)
|
52 |
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
|
53 |
pipe = StableDiffusionPipeline.from_pretrained(
|
54 |
base_model_path,
|
|
|
63 |
return ip_model
|
64 |
|
65 |
|
66 |
+
ip_model = get_ip_model()
|
67 |
+
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
68 |
+
app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
|
69 |
+
|
70 |
+
def generate_images(prompt, img_filepath,
|
71 |
+
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
|
72 |
+
img_prompt_scale=0.5,
|
73 |
+
num_inference_steps=30,
|
74 |
+
seed=None, n_images=1):
|
75 |
+
print(f'{datetime.now().strftime("%Y/%m/%d %H:%M:%S")}: {prompt}')
|
76 |
image = cv2.imread(img_filepath)
|
77 |
faces = app.get(image)
|
78 |
|
79 |
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
|
80 |
+
face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224) # you can also segment the face
|
81 |
images = ip_model.generate(
|
82 |
prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
|
83 |
num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
|
84 |
+
scale=img_prompt_scale, # with scale=1 I get weird images
|
85 |
)
|
86 |
return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
|
87 |
|
|
|
|
|
|
|
|
|
88 |
|
89 |
+
with gr.Blocks() as demo:
|
90 |
+
gr.Markdown(
|
|
|
91 |
"""
|
92 |
+
# IP-Adapter-FaceID-plus
|
93 |
+
Generate images conditioned on a image prompt and a text prompt. Learn more here: https://huggingface.co/h94/IP-Adapter-FaceID
|
94 |
+
This demo is intended to use on GPU. It will work also on CPU but generating one image could take 900 seconds compared to a few seconds on GPU.
|
|
|
|
|
95 |
""")
|
96 |
+
with gr.Row():
|
97 |
+
with gr.Column():
|
98 |
+
demo_inputs = []
|
99 |
+
demo_inputs.append(gr.Textbox(label='text prompt', value='Linkedin profile picture'))
|
100 |
+
demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
|
101 |
+
with gr.Accordion(label='Advanced options', open=False):
|
102 |
+
demo_inputs.append(gr.Textbox(label='negative text prompt', value="monochrome, lowres, bad anatomy, worst quality, low quality, blurry"))
|
103 |
+
demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
|
104 |
+
btn = gr.Button("Generate")
|
105 |
+
|
106 |
+
with gr.Column():
|
107 |
+
demo_outputs = []
|
108 |
+
demo_outputs.append(gr.Image(label='generated image'))
|
109 |
+
demo_outputs.append(gr.Image(label='detected face', height=224, width=224))
|
110 |
+
btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
|
111 |
+
sample_prompts = [
|
112 |
+
'Linkedin profile picture',
|
113 |
+
'A singer on stage',
|
114 |
+
'A politician talking to the people',
|
115 |
+
'An astronaut in space',
|
|
|
|
|
116 |
]
|
117 |
+
gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')
|
118 |
+
|
119 |
+
demo.launch(share=True, debug=True)
|