Spaces:

MAPS-research
/

Diffusion-Cocktail

Running

App Files Files Community

Ricercar commited on Jan 6

Commit

10c79ab

•

1 Parent(s): 5857783

prepare for archive

Browse files

Files changed (4) hide show

app.py +56 -19
ditail/src/ditail_demo.py +3 -1
example/Cocktail.jpg +0 -0
example/Cocktail_impression.jpg +0 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import gradio as gr
 import numpy as np
 import torch
@@ -31,12 +32,19 @@ LORA_TRIGGER_WORD = {
     'shinkai_makoto': ['shinkai makoto', 'kimi no na wa.', 'tenki no ko', 'kotonoha no niwa'],
 }
 class WebApp():
     def __init__(self, debug_mode=False):
         self.args_base = {
             "seed": 42,
-            "device": "cuda",
             "output_dir": "output_demo",
             "caption_model_name": "blip-large",
             "clip_model_name": "ViT-L-14/openai",
@@ -60,7 +68,6 @@ class WebApp():
         self.args_input = {} # for gr.components only
         self.gr_loras = list(LORA_TRIGGER_WORD.keys())
-        # fun fact: google analytics doesn't work in this space currently
         self.gtag = os.environ.get('GTag')
         self.ga_script = f"""
@@ -80,13 +87,13 @@ class WebApp():
         # self._preload_pipeline()
         self.debug_mode = debug_mode # turn off clip interrogator when debugging for faster building speed
-        if not self.debug_mode:
             self.init_interrogator()
     def init_interrogator(self):
         cache_path = os.environ.get('HF_HOME')
-        print(f"Intended cache dir: {cache_path}")
         config = Config()
         config.cache_path = cache_path
         config.clip_model_path = cache_path
@@ -96,7 +103,7 @@ class WebApp():
         self.ci.config.chunk_size = 2048 if self.ci.config.clip_model_name == "ViT-L-14/openai" else 1024
         self.ci.config.flavor_intermediate_count = 2048 if self.ci.config.clip_model_name == "ViT-L-14/openai" else 1024
-        print(f"HF cache dir: {file_utils.default_cache_path}")
     def _preload_pipeline(self):
         for model in BASE_MODEL.values():
@@ -114,10 +121,10 @@ class WebApp():
                     <h1 >Diffusion Cocktail 🍸: Fused Generation from Diffusion Models</h1>
                     <div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;">
                         <a class="flex-item" href="https://arxiv.org/abs/2312.08873" target="_blank">
-                            <img src="https://img.shields.io/badge/arXiv-paper-darkred.svg" alt="arXiv Paper">
                         </a>
                         <a class="flex-item" href="https://MAPS-research.github.io/Ditail" target="_blank">
-                            <img src="https://img.shields.io/badge/Project_Page-Diffusion_Cocktail-yellow.svg" alt="Project Page">
                         </a>
                         <a class="flex-item" href="https://github.com/MAPS-research/Ditail" target="_blank">
                             <img src="https://img.shields.io/badge/Github-Code-green.svg" alt="GitHub Code">
@@ -127,7 +134,20 @@ class WebApp():
                 </div>
                 """
                 )
     def get_image(self):
         self.args_input['img'] = gr.Image(label='content image', type='pil', show_share_button=False, elem_classes="input_image")
@@ -142,7 +162,7 @@ class WebApp():
     def _interrogate_image(self, image, generate_prompt):
-        if hasattr(self, 'ci') and generate_prompt:
             return self.ci.interrogate_fast(image).split(',')[0].replace('arafed', '')
         else:
             return ''
@@ -153,8 +173,8 @@ class WebApp():
     def get_lora(self, num_cols=3):
         self.args_input['lora'] = gr.State('none')
-        lora_gallery = gr.Gallery(label='target LoRA (optional)', columns=num_cols, value=[(os.path.join(self.args_base['lora_dir'], f"{lora}.jpeg"), lora) for lora in self.gr_loras], allow_preview=False, show_share_button=False, selected_index=0)
-        lora_gallery.select(self._update_lora_selection, inputs=[], outputs=[self.args_input['lora']])
     def _update_lora_selection(self, selected_state: gr.SelectData):
         return self.gr_loras[selected_state.index]
@@ -180,7 +200,7 @@ class WebApp():
     def run_ditail(self, *values):
         gr_args = self.args_base.copy()
-        print(self.args_input.keys())
         for k, v in zip(list(self.args_input.keys()), values):
             gr_args[k] = v
         # quick fix for example
@@ -195,9 +215,9 @@ class WebApp():
         seed_everything(gr_args['seed'])
         ditail = DitailDemo(gr_args)
-        metadata_to_show = ['inv_model', 'spl_model', 'lora', 'lora_scale', 'inv_steps', 'spl_steps', 'pos_prompt', 'alpha', 'neg_prompt', 'beta', 'omega']
         args_to_show = {}
-        for key in metadata_to_show:
             args_to_show[key] = gr_args[key]
         img = ditail.run_ditail()
@@ -207,8 +227,19 @@ class WebApp():
         return img, args_to_show
-    def run_example(self, img, prompt, inv_model, spl_model, lora):
-        return self.run_ditail(img, prompt, spl_model, gr.State(lora), inv_model)
     def show_credits(self):
         gr.Markdown(
@@ -224,6 +255,10 @@ class WebApp():
         with gr.Blocks(css='.input_image img {object-fit: contain;}', head=self.ga_script) as demo:
             self.title()
             with gr.Row():
                 self.get_image()
@@ -232,6 +267,8 @@ class WebApp():
                     self.get_base_model()
                     self.get_lora(num_cols=3)
                     submit_btn = gr.Button("Generate", variant='primary')
             with gr.Accordion("advanced options", open=False):
                 self.get_params()
@@ -250,12 +287,12 @@ class WebApp():
             with gr.Row():
                 cache_examples = not self.debug_mode
                 gr.Examples(
-                    examples=[[os.path.join(os.path.dirname(__file__), "example", "Lenna.png"), 'a woman called Lenna wearing a feathered hat', list(BASE_MODEL.keys())[1], list(BASE_MODEL.keys())[2], 'none']],
                     inputs=[self.args_input['img'], self.args_input['pos_prompt'], self.args_input['inv_model'], self.args_input['spl_model'], gr.Textbox(label='LoRA', visible=False), ],
                     fn = self.run_example,
                     outputs=[output_image, metadata],
                     run_on_click=True,
-                    cache_examples=cache_examples,
                 )
             self.show_credits()
@@ -264,7 +301,7 @@ class WebApp():
         return demo
-app = WebApp(debug_mode=False)
 demo = app.ui()

 import os
+import argparse
 import gradio as gr
 import numpy as np
 import torch
     'shinkai_makoto': ['shinkai makoto', 'kimi no na wa.', 'tenki no ko', 'kotonoha no niwa'],
 }
+METADATA_TO_SHOW = ['inv_model', 'spl_model', 'lora', 'lora_scale', 'inv_steps', 'spl_steps', 'pos_prompt', 'alpha', 'neg_prompt', 'beta', 'omega']
 class WebApp():
     def __init__(self, debug_mode=False):
+        if torch.cuda.is_available():
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
         self.args_base = {
             "seed": 42,
+            "device": self.device,
             "output_dir": "output_demo",
             "caption_model_name": "blip-large",
             "clip_model_name": "ViT-L-14/openai",
         self.args_input = {} # for gr.components only
         self.gr_loras = list(LORA_TRIGGER_WORD.keys())
         self.gtag = os.environ.get('GTag')
         self.ga_script = f"""
         # self._preload_pipeline()
         self.debug_mode = debug_mode # turn off clip interrogator when debugging for faster building speed
+        if not self.debug_mode and self.device=="cuda":
             self.init_interrogator()
     def init_interrogator(self):
         cache_path = os.environ.get('HF_HOME')
+        # print(f"Intended cache dir: {cache_path}")
         config = Config()
         config.cache_path = cache_path
         config.clip_model_path = cache_path
         self.ci.config.chunk_size = 2048 if self.ci.config.clip_model_name == "ViT-L-14/openai" else 1024
         self.ci.config.flavor_intermediate_count = 2048 if self.ci.config.clip_model_name == "ViT-L-14/openai" else 1024
+        # print(f"HF cache dir: {file_utils.default_cache_path}")
     def _preload_pipeline(self):
         for model in BASE_MODEL.values():
                     <h1 >Diffusion Cocktail 🍸: Fused Generation from Diffusion Models</h1>
                     <div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;">
                         <a class="flex-item" href="https://arxiv.org/abs/2312.08873" target="_blank">
+                            <img src="https://img.shields.io/badge/arXiv-Paper-darkred.svg" alt="arXiv Paper">
                         </a>
                         <a class="flex-item" href="https://MAPS-research.github.io/Ditail" target="_blank">
+                            <img src="https://img.shields.io/badge/Website-Ditail-yellow.svg" alt="Project Page">
                         </a>
                         <a class="flex-item" href="https://github.com/MAPS-research/Ditail" target="_blank">
                             <img src="https://img.shields.io/badge/Github-Code-green.svg" alt="GitHub Code">
                 </div>
                 """
                 )
+    def device_requirements(self):
+        gr.Markdown(
+            """
+            <center>
+            <h2>
+            Attention: The demo doesn't work in this space running on CPU only. \
+            Please duplicate and upgrade to a private "T4 medium" GPU.
+            </h2>
+            </center>
+            """
+        )
+        gr.DuplicateButton(size='lg', scale=1, variant='primary')
     def get_image(self):
         self.args_input['img'] = gr.Image(label='content image', type='pil', show_share_button=False, elem_classes="input_image")
     def _interrogate_image(self, image, generate_prompt):
+        if hasattr(self, 'ci') and image is not None and generate_prompt:
             return self.ci.interrogate_fast(image).split(',')[0].replace('arafed', '')
         else:
             return ''
     def get_lora(self, num_cols=3):
         self.args_input['lora'] = gr.State('none')
+        self.lora_gallery = gr.Gallery(label='target LoRA (optional)', columns=num_cols, value=[(os.path.join(self.args_base['lora_dir'], f"{lora}.jpeg"), lora) for lora in self.gr_loras], allow_preview=False, show_share_button=False)
+        self.lora_gallery.select(self._update_lora_selection, inputs=[], outputs=[self.args_input['lora']])
     def _update_lora_selection(self, selected_state: gr.SelectData):
         return self.gr_loras[selected_state.index]
     def run_ditail(self, *values):
         gr_args = self.args_base.copy()
+        # print(self.args_input.keys())
         for k, v in zip(list(self.args_input.keys()), values):
             gr_args[k] = v
         # quick fix for example
         seed_everything(gr_args['seed'])
         ditail = DitailDemo(gr_args)
         args_to_show = {}
+        for key in METADATA_TO_SHOW:
             args_to_show[key] = gr_args[key]
         img = ditail.run_ditail()
         return img, args_to_show
+    # def run_example(self, img, prompt, inv_model, spl_model, lora):
+        # return self.run_ditail(img, prompt, spl_model, gr.State(lora), inv_model)
+    def run_example(self, *values):
+        gr_args = self.args_base.copy()
+        for k, v in zip(['img', 'pos_prompt', 'inv_model', 'spl_model', 'lora'], values):
+            gr_args[k] = v
+        args_to_show = {}
+        for key in METADATA_TO_SHOW:
+            args_to_show[key] = gr_args[key]
+        img = os.path.join(os.path.dirname(__file__), "example", "Cocktail_impression.jpg")
+        # self.lora_gallery.selected_index = self.gr_loras.index(gr_args['lora'])
+        return img, args_to_show
     def show_credits(self):
         gr.Markdown(
         with gr.Blocks(css='.input_image img {object-fit: contain;}', head=self.ga_script) as demo:
             self.title()
+            if self.device == "cpu":
+                self.device_requirements()
             with gr.Row():
                 self.get_image()
                     self.get_base_model()
                     self.get_lora(num_cols=3)
                     submit_btn = gr.Button("Generate", variant='primary')
+                    if self.device == 'cpu':
+                        submit_btn.variant='secondary'
             with gr.Accordion("advanced options", open=False):
                 self.get_params()
             with gr.Row():
                 cache_examples = not self.debug_mode
                 gr.Examples(
+                    examples=[[os.path.join(os.path.dirname(__file__), "example", "Cocktail.jpg"), 'a glass of a cocktail with a lime wedge on it', list(BASE_MODEL.keys())[1], list(BASE_MODEL.keys())[1], 'impressionism']],
                     inputs=[self.args_input['img'], self.args_input['pos_prompt'], self.args_input['inv_model'], self.args_input['spl_model'], gr.Textbox(label='LoRA', visible=False), ],
                     fn = self.run_example,
                     outputs=[output_image, metadata],
                     run_on_click=True,
+                    # cache_examples=cache_examples,
                 )
             self.show_credits()
         return demo
+app = WebApp(debug_mode=True)
 demo = app.ui()

ditail/src/ditail_demo.py CHANGED Viewed

@@ -72,11 +72,13 @@ class DitailDemo(nn.Module):
                 padding='max_length',
                 max_length=self.tokenizer.model_max_length
             )
     @torch.no_grad()
     def encode_image(self, image_pil):
         # image_pil = T.Resize(512)(img.convert('RGB'))
         image_pil = T.Resize(512)(image_pil)
         image = T.ToTensor()(image_pil).unsqueeze(0).to(self.device)
         with torch.autocast(device_type=self.device, dtype=torch.float32):
             image = 2 * image - 1

                 padding='max_length',
                 max_length=self.tokenizer.model_max_length
             )
     @torch.no_grad()
     def encode_image(self, image_pil):
         # image_pil = T.Resize(512)(img.convert('RGB'))
         image_pil = T.Resize(512)(image_pil)
+        width, height = image_pil.size
         image = T.ToTensor()(image_pil).unsqueeze(0).to(self.device)
         with torch.autocast(device_type=self.device, dtype=torch.float32):
             image = 2 * image - 1

example/Cocktail.jpg ADDED Viewed

example/Cocktail_impression.jpg ADDED Viewed