Spaces:

rynmurdock
/

generative_recsys

Sleeping

App Files Files Community

rynmurdock

multimodalart HF staff commited on Apr 11

Commit

db551d5

•

1 Parent(s): 178e606

Performance PR (#2)

Browse files

- Performance PR (f33c43f609f59f8722b5928f0535007a9157da38)
- Disable SC (e6d1b5454f215a7280081510188907d11646de37)

Co-authored-by: Apolinário from multimodal AI art <multimodalart@users.noreply.huggingface.co>

Files changed (2) hide show

app.py +46 -22
patch_sdxl.py +4 -30

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from sklearn.svm import LinearSVC
 from sklearn import preprocessing
 import pandas as pd
-from diffusers import LCMScheduler
 from diffusers.models import ImageProjection
 from patch_sdxl import SDEmb
 import torch
@@ -22,6 +22,9 @@ from PIL import Image
 import requests
 from io import BytesIO, StringIO
 prompt_list = [p for p in list(set(
                 pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
@@ -29,12 +32,16 @@ start_time = time.time()
 ####################### Setup Model
 model_id = "stabilityai/stable-diffusion-xl-base-1.0"
-lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
-pipe = SDEmb.from_pretrained(model_id, variant="fp16", low_cpu_mem_usage=True, device_map="auto")
-pipe.load_lora_weights(lcm_lora_id)
-pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-pipe.to(device='cuda', dtype=torch.float16)
 pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 output_hidden_state = False
 #######################
@@ -53,7 +60,7 @@ def predict(
             ip_adapter_emb=im_emb.to('cuda'),
             height=1024,
             width=1024,
-            num_inference_steps=8,
             guidance_scale=0,
             ).images[0]
         im_emb, _ = pipe.encode_image(
@@ -61,12 +68,6 @@ def predict(
             )
         return image, im_emb.to(DEVICE)
 # TODO add to state instead of shared across all
 glob_idx = 0
@@ -133,9 +134,9 @@ def next_image(embs, ys, calibrate_prompts):
 def start(_, embs, ys, calibrate_prompts):
     image, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
     return [
-            gr.Button(value='Like', interactive=True),
-            gr.Button(value='Neither', interactive=True),
-            gr.Button(value='Dislike', interactive=True),
             gr.Button(value='Start', interactive=False),
             image,
             embs,
@@ -157,9 +158,32 @@ def choose(choice, embs, ys, calibrate_prompts):
     img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
     return img, embs, ys, calibrate_prompts
-css = ".gradio-container{max-width: 700px !important}"
-print(css)
-with gr.Blocks(css=css) as demo:
     embs = gr.State([])
     ys = gr.State([])
     calibrate_prompts = gr.State([
@@ -177,9 +201,9 @@ with gr.Blocks(css=css) as demo:
     with gr.Row(elem_id='output-image'):
         img = gr.Image(interactive=False, elem_id='output-image',width=700)
     with gr.Row(equal_height=True):
-        b3 = gr.Button(value='Dislike', interactive=False,)
-        b2 = gr.Button(value='Neither', interactive=False,)
-        b1 = gr.Button(value='Like', interactive=False,)
         b1.click(
         choose,
         [b1, embs, ys, calibrate_prompts],

 from sklearn import preprocessing
 import pandas as pd
+from diffusers import LCMScheduler, AutoencoderTiny, EulerDiscreteScheduler, UNet2DConditionModel
 from diffusers.models import ImageProjection
 from patch_sdxl import SDEmb
 import torch
 import requests
 from io import BytesIO, StringIO
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
 prompt_list = [p for p in list(set(
                 pd.read_csv('./twitter_prompts.csv').iloc[:, 1].tolist())) if type(p) == str]
 ####################### Setup Model
 model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+sdxl_lightening = "ByteDance/SDXL-Lightning"
+ckpt = "sdxl_lightning_2step_unet.safetensors"
+unet = UNet2DConditionModel.from_config(model_id, subfolder="unet").to("cuda", torch.float16)
+unet.load_state_dict(load_file(hf_hub_download(sdxl_lightening, ckpt), device="cuda"))
+pipe = SDEmb.from_pretrained(model_id, unet=unet, torch_dtype=torch.float16, variant="fp16").to("cuda")
+pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
+pipe.to(device='cuda')
 pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 output_hidden_state = False
 #######################
             ip_adapter_emb=im_emb.to('cuda'),
             height=1024,
             width=1024,
+            num_inference_steps=2,
             guidance_scale=0,
             ).images[0]
         im_emb, _ = pipe.encode_image(
             )
         return image, im_emb.to(DEVICE)
 # TODO add to state instead of shared across all
 glob_idx = 0
 def start(_, embs, ys, calibrate_prompts):
     image, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
     return [
+            gr.Button(value='Like (L)', interactive=True),
+            gr.Button(value='Neither (Space)', interactive=True),
+            gr.Button(value='Dislike (A)', interactive=True),
             gr.Button(value='Start', interactive=False),
             image,
             embs,
     img, embs, ys, calibrate_prompts = next_image(embs, ys, calibrate_prompts)
     return img, embs, ys, calibrate_prompts
+css = '''.gradio-container{max-width: 700px !important}
+#description{text-align: center}
+#description h1{display: block}
+#description p{margin-top: 0}
+'''
+js = '''
+<script>
+document.addEventListener('keydown', function(event) {
+    if (event.key === 'a' || event.key === 'A') {
+        // Trigger click on 'dislike' if 'A' is pressed
+        document.getElementById('dislike').click();
+    } else if (event.key === ' ' || event.keyCode === 32) {
+        // Trigger click on 'neither' if Spacebar is pressed
+        document.getElementById('neither').click();
+    } else if (event.key === 'l' || event.key === 'L') {
+        // Trigger click on 'like' if 'L' is pressed
+        document.getElementById('like').click();
+    }
+});
+</script>
+'''
+with gr.Blocks(css=css, head=js) as demo:
+    gr.Markdown('''# Generative Recommenders
+    Explore the latent space without text prompts, based on your preferences. [Learn more on the blog](https://rynmurdock.github.io/posts/2024/3/generative_recomenders/)
+    ''', elem_id="description")
     embs = gr.State([])
     ys = gr.State([])
     calibrate_prompts = gr.State([
     with gr.Row(elem_id='output-image'):
         img = gr.Image(interactive=False, elem_id='output-image',width=700)
     with gr.Row(equal_height=True):
+        b3 = gr.Button(value='Dislike (A)', interactive=False, elem_id="dislike")
+        b2 = gr.Button(value='Neither (Space)', interactive=False, elem_id="neither")
+        b1 = gr.Button(value='Like (L)', interactive=False, elem_id="like")
         b1.click(
         choose,
         [b1, embs, ys, calibrate_prompts],

patch_sdxl.py CHANGED Viewed

@@ -1,6 +1,3 @@
 import inspect
 from typing import Any, Callable, Dict, List, Optional, Union, Tuple
@@ -29,7 +26,6 @@ from diffusers.pipelines.stable_diffusion_xl import StableDiffusionXLPipelineOut
-from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from transformers import CLIPFeatureExtractor
 import numpy as np
 import torch
@@ -40,27 +36,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 torch_device = device
 torch_dtype = torch.float16
-safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-    "CompVis/stable-diffusion-safety-checker"
-).to(device)
-feature_extractor = CLIPFeatureExtractor.from_pretrained(
-    "openai/clip-vit-base-patch32"
-)
-def check_nsfw_images(
-    images: list[Image.Image],
-) -> list[bool]:
-    safety_checker_input = feature_extractor(images, return_tensors="pt").to(device)
-    images_np = [np.array(img) for img in images]
-    _, has_nsfw_concepts = safety_checker(
-        images=images_np,
-        clip_input=safety_checker_input.pixel_values.to(torch_device),
-    )
-    return has_nsfw_concepts
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -569,12 +544,11 @@ class SDEmb(StableDiffusionXLPipeline):
             # apply watermark if available
             if self.watermark is not None:
                 image = self.watermark.apply_watermark(image)
             image = self.image_processor.postprocess(image, output_type=output_type)
-        maybe_nsfw = any(check_nsfw_images(image))
-        if maybe_nsfw:
-            print('This image could be NSFW so we return a blank image.')
-            return StableDiffusionXLPipelineOutput(images=[Image.new('RGB', (1024, 1024))])
         # Offload all models
         self.maybe_free_model_hooks()

 import inspect
 from typing import Any, Callable, Dict, List, Optional, Union, Tuple
 from transformers import CLIPFeatureExtractor
 import numpy as np
 import torch
 torch_device = device
 torch_dtype = torch.float16
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
             # apply watermark if available
             if self.watermark is not None:
                 image = self.watermark.apply_watermark(image)
             image = self.image_processor.postprocess(image, output_type=output_type)
+        #maybe_nsfw = any(check_nsfw_images(image))
+        #if maybe_nsfw:
+        #    print('This image could be NSFW so we return a blank image.')
+        #    return StableDiffusionXLPipelineOutput(images=[Image.new('RGB', (1024, 1024))])
         # Offload all models
         self.maybe_free_model_hooks()