Spaces:

samir-fama
/

Image-Adapter-With-Face-ID

Runtime error

App Files Files Community

samir-fama commited on Jan 5

Commit

96891ca

•

1 Parent(s): fbbd1d6

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -54

app.py CHANGED Viewed

@@ -1,33 +1,54 @@
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
 from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
 from insightface.app import FaceAnalysis
 from insightface.utils import face_align
 from huggingface_hub import hf_hub_download
-import torch
-from PIL import Image
-import cv2
-import gradio as gr
-hf_hub_download(repo_id='h94/IP-Adapter-FaceID', filename='ip-adapter-faceid-plus_sd15.bin', local_dir='IP-Adapter-FaceID')
-hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/config.json', local_dir='IP-Adapter')
-hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/pytorch_model.bin', local_dir='IP-Adapter')
 def get_ip_model():
     base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
     vae_model_path = "stabilityai/sd-vae-ft-mse"
     image_encoder_path = "IP-Adapter/models/image_encoder"
     ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     print(f'Using device: {device}')
-    noise_scheduler = DDIMScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012,
-                                    beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False, steps_offset=1)
     vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
     pipe = StableDiffusionPipeline.from_pretrained(
         base_model_path,
@@ -42,58 +63,57 @@ def get_ip_model():
     return ip_model
-def generate_images(prompt, img_filepath, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
-                    img_prompt_scale=0.5, num_inference_steps=30, seed=None, n_images=1):
     image = cv2.imread(img_filepath)
     faces = app.get(image)
     faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
-    face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224)
     images = ip_model.generate(
         prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
         num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
-        scale=img_prompt_scale,
     )
     return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
-if __name__ == "__main__":
-    ip_model = get_ip_model()
-    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
-    app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
-    with gr.Blocks() as demo:
-        gr.Markdown(
     """
-    # ✨ Image Prompt Adapter With FaceID 🧙‍♂️
-    Unleash the magic of generating whimsical images with just an image and a sprinkle of text! Learn the secrets here: [Magic Link](https://huggingface.co/h94/IP-Adapter-FaceID)
-    🚀 This enchanting demo is designed to soar on GPU. While it can still dance on CPU, conjuring just one image might take up to 600 seconds—compared to the blink-of-an-eye magic on GPU! ✨
     """)
-        with gr.Row():
-            with gr.Column():
-                demo_inputs = []
-                demo_inputs.append(gr.Textbox(label='text prompt', value='A bold rider in a white horse'))
-                demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
-                with gr.Accordion(label='Advanced options', open=False):
-                    demo_inputs.append(gr.Textbox(label='negative text prompt',
-                                                  value="deformed hands,  watermark, text, deformed fingers, blurred faces, irregular face, irrregular body shape, ugly eyes, deformed face, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, ugly eyes, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, disfigured, kitsch, ugly, oversaturated, grain, low-res, Deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, ugly, disgusting, poorly drawn, childish, mutilated, mangled, old, surreal, 2 heads, 2 faces"))
-                    demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
-                btn = gr.Button("Generate")
-            with gr.Column():
-                demo_outputs = []
-                demo_outputs.append(gr.Image(label='generated image'))
-                demo_outputs.append(gr.Image(label='detected face', height=224, width=224))
-        btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
-        sample_prompts = [
-            'A wizard casting spells in a coffee shop',
-            'A penguin teaching a yoga class',
-            'A robot composing a symphony',
-            'A giraffe participating in a slam poetry contest',
-            'A bold rider in a white horse'
         ]
-        gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')
-    demo.launch(share=True, debug=True)

+import torch
 from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
+from PIL import Image
 from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
+import cv2
 from insightface.app import FaceAnalysis
 from insightface.utils import face_align
+import gradio as gr
 from huggingface_hub import hf_hub_download
+from datetime import datetime
+def download_models():
+    hf_hub_download(
+        repo_id='h94/IP-Adapter-FaceID',
+        filename='ip-adapter-faceid-plus_sd15.bin',
+        local_dir='IP-Adapter-FaceID')
+    hf_hub_download(
+        repo_id='h94/IP-Adapter',
+        filename='models/image_encoder/config.json',
+        local_dir='IP-Adapter')
+    hf_hub_download(
+        repo_id='h94/IP-Adapter',
+        filename='models/image_encoder/pytorch_model.bin',
+        local_dir='IP-Adapter')
 def get_ip_model():
+    download_models()
     base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
     vae_model_path = "stabilityai/sd-vae-ft-mse"
     image_encoder_path = "IP-Adapter/models/image_encoder"
     ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"
+    if torch.cuda.is_available():
+        device = 'cuda'
+        torch_dtype = torch.float16
+    else:
+        device = 'cpu'
+        torch_dtype = torch.float32
     print(f'Using device: {device}')
+    noise_scheduler = DDIMScheduler(
+        num_train_timesteps=1000,
+        beta_start=0.00085,
+        beta_end=0.012,
+        beta_schedule="scaled_linear",
+        clip_sample=False,
+        set_alpha_to_one=False,
+        steps_offset=1,
+    )
     vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
     pipe = StableDiffusionPipeline.from_pretrained(
         base_model_path,
     return ip_model
+ip_model = get_ip_model()
+app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+app.prepare(ctx_id=0, det_size=(640, 640), det_thresh=0.2)
+def generate_images(prompt, img_filepath,
+                    negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
+                    img_prompt_scale=0.5,
+                    num_inference_steps=30,
+                    seed=None, n_images=1):
+    print(f'{datetime.now().strftime("%Y/%m/%d %H:%M:%S")}: {prompt}')
     image = cv2.imread(img_filepath)
     faces = app.get(image)
     faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
+    face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=224) # you can also segment the face
     images = ip_model.generate(
         prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
         num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
+        scale=img_prompt_scale, # with scale=1 I get weird images
     )
     return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]
+with gr.Blocks() as demo:
+    gr.Markdown(
     """
+    # IP-Adapter-FaceID-plus
+    Generate images conditioned on a image prompt and a text prompt. Learn more here: https://huggingface.co/h94/IP-Adapter-FaceID
+    This demo is intended to use on GPU. It will work also on CPU but generating one image could take 900 seconds compared to a few seconds on GPU.
     """)
+    with gr.Row():
+        with gr.Column():
+            demo_inputs = []
+            demo_inputs.append(gr.Textbox(label='text prompt', value='Linkedin profile picture'))
+            demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
+            with gr.Accordion(label='Advanced options', open=False):
+                demo_inputs.append(gr.Textbox(label='negative text prompt', value="monochrome, lowres, bad anatomy, worst quality, low quality, blurry"))
+                demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
+            btn = gr.Button("Generate")
+        with gr.Column():
+            demo_outputs = []
+            demo_outputs.append(gr.Image(label='generated image'))
+            demo_outputs.append(gr.Image(label='detected face', height=224, width=224))
+    btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
+    sample_prompts = [
+        'Linkedin profile picture',
+        'A singer on stage',
+        'A politician talking to the people',
+        'An astronaut in space',
         ]
+    gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')
+demo.launch(share=True, debug=True)