Spaces:

shikunl
/

prismer

Sleeping

App Files Files Community

shikunl commited on Mar 12, 2023

Commit

0312353

•

1 Parent(s): b734d92

Test experts

Browse files

Files changed (2) hide show

app_caption.py +11 -15
prismer_model.py +30 -87

app_caption.py CHANGED Viewed

@@ -4,7 +4,6 @@ from __future__ import annotations
 import os
 import pathlib
 import gradio as gr
 from prismer_model import Model
@@ -16,9 +15,7 @@ def create_demo():
     with gr.Row():
         with gr.Column():
             image = gr.Image(label='Input', type='filepath')
-            model_name = gr.Dropdown(label='Model',
-                                     choices=['prismer_base'],
-                                     value='prismer_base')
             run_button = gr.Button('Run')
         with gr.Column(scale=1.5):
             caption = gr.Text(label='Caption')
@@ -32,23 +29,22 @@ def create_demo():
                 ocr = gr.Image(label='OCR Detection')
     inputs = [image, model_name]
-    outputs = [
-        caption,
-        depth,
-        edge,
-        normals,
-        segmentation,
-        object_detection,
-        ocr,
-    ]
     paths = sorted(pathlib.Path('prismer/images').glob('*'))
     examples = [[path.as_posix(), 'prismer_base'] for path in paths]
     gr.Examples(examples=examples,
                 inputs=inputs,
                 outputs=outputs,
-                fn=model.run_caption,
-                cache_examples=os.getenv('SYSTEM') == 'spaces')
     run_button.click(fn=model.run_caption, inputs=inputs, outputs=outputs)

 import os
 import pathlib
 import gradio as gr
 from prismer_model import Model
     with gr.Row():
         with gr.Column():
             image = gr.Image(label='Input', type='filepath')
+            model_name = gr.Dropdown(label='Model', choices=['prismer_base'], value='prismer_base')
             run_button = gr.Button('Run')
         with gr.Column(scale=1.5):
             caption = gr.Text(label='Caption')
                 ocr = gr.Image(label='OCR Detection')
     inputs = [image, model_name]
+    outputs = [caption, depth, edge, normals, segmentation, object_detection, ocr]
+    # paths = sorted(pathlib.Path('prismer/images').glob('*'))
+    # examples = [[path.as_posix(), 'prismer_base'] for path in paths]
+    # gr.Examples(examples=examples,
+    #             inputs=inputs,
+    #             outputs=outputs,
+    #             fn=model.run_caption,
+    #             cache_examples=os.getenv('SYSTEM') == 'spaces')
     paths = sorted(pathlib.Path('prismer/images').glob('*'))
     examples = [[path.as_posix(), 'prismer_base'] for path in paths]
     gr.Examples(examples=examples,
                 inputs=inputs,
                 outputs=outputs,
+                fn=model.run_caption)
     run_button.click(fn=model.run_caption, inputs=inputs, outputs=outputs)

prismer_model.py CHANGED Viewed

@@ -20,32 +20,22 @@ from model.prismer_caption import PrismerCaption
 def download_models() -> None:
     if not pathlib.Path('prismer/experts/expert_weights/').exists():
-        subprocess.run(shlex.split(
-            'python download_checkpoints.py --download_experts=True'),
-                       cwd='prismer')
     model_names = [
-        'vqa_prismer_base',
-        'vqa_prismer_large',
-        'vqa_prismerz_base',
-        'vqa_prismerz_large',
-        'caption_prismerz_base',
-        'caption_prismerz_large',
         'caption_prismer_base',
         'caption_prismer_large',
     ]
     for model_name in model_names:
         if pathlib.Path(f'prismer/logging/{model_name}').exists():
             continue
-        subprocess.run(shlex.split(
-            f'python download_checkpoints.py --download_models={model_name}'),
-                       cwd='prismer')
 def build_deformable_conv() -> None:
-    subprocess.run(
-        shlex.split('sh make.sh'),
-        cwd=
-        'prismer/experts/segmentation/mask2former/modeling/pixel_decoder/ops')
 def run_experts(image_path: str) -> tuple[str | None, ...]:
@@ -56,40 +46,18 @@ def run_experts(image_path: str) -> tuple[str | None, ...]:
     out_path = image_dir / 'image.jpg'
     cv2.imwrite(out_path.as_posix(), cv2.imread(image_path))
-    expert_names = [
-        'depth',
-        'edge',
-        'normal',
-        'objdet',
-        'ocrdet',
-        'segmentation',
-    ]
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
             env['PYTHONPATH'] = f'{submodule_dir.as_posix()}:{env["PYTHONPATH"]}'
         else:
             env['PYTHONPATH'] = submodule_dir.as_posix()
-        subprocess.run(
-            shlex.split(f'python experts/generate_{expert_name}.py'),
-            cwd='prismer',
-            env=env,
-            check=True)
-    keys = [
-        'depth',
-        'edge',
-        'normal',
-        'seg_coco',
-        'obj_detection',
-        'ocr_detection',
-    ]
-    results = [
-        pathlib.Path('prismer/helpers/labels') / key /
-        'helpers/images/image.png' for key in keys
-    ]
-    return tuple(path.as_posix() if path.exists() else None
-                 for path in results)
 class Model:
@@ -102,67 +70,42 @@ class Model:
     def set_model(self, exp_name: str) -> None:
         if exp_name == self.exp_name:
             return
         config = {
-            'dataset':
-            'demo',
-            'data_path':
-            'prismer/helpers',
-            'label_path':
-            'prismer/helpers/labels',
-            'experts': [
-                'depth',
-                'normal',
-                'seg_coco',
-                'edge',
-                'obj_detection',
-                'ocr_detection',
-            ],
-            'image_resolution':
-            480,
-            'prismer_model':
-            'prismer_base',
-            'freeze':
-            'freeze_vision',
-            'prefix':
-            'A picture of',
         }
         model = PrismerCaption(config)
-        state_dict = torch.load(
-            f'prismer/logging/caption_{exp_name}/pytorch_model.bin',
-            map_location='cuda:0')
         model.load_state_dict(state_dict)
         model.eval()
-        tokenizer = model.tokenizer
         self.config = config
         self.model = model
-        self.tokenizer = tokenizer
         self.exp_name = exp_name
     @torch.inference_mode()
     def run_caption_model(self, exp_name: str) -> str:
         self.set_model(exp_name)
         _, test_dataset = create_dataset('caption', self.config)
-        test_loader = create_loader(test_dataset,
-                                    batch_size=1,
-                                    num_workers=4,
-                                    train=False)
         experts, _ = next(iter(test_loader))
-        captions = self.model(experts,
-                              train=False,
-                              prefix=self.config['prefix'])
-        captions = self.tokenizer(captions,
-                                  max_length=30,
-                                  padding='max_length',
-                                  return_tensors='pt').input_ids
         caption = captions.to(experts['rgb'].device)[0]
         caption = self.tokenizer.decode(caption, skip_special_tokens=True)
         caption = caption.capitalize() + '.'
         return caption
-    def run_caption(self, image_path: str,
-                    model_name: str) -> tuple[str | None, ...]:
         out_paths = run_experts(image_path)
-        caption = self.run_caption_model(model_name)
-        return caption, *out_paths

 def download_models() -> None:
     if not pathlib.Path('prismer/experts/expert_weights/').exists():
+        subprocess.run(shlex.split('python download_checkpoints.py --download_experts=True'), cwd='prismer')
     model_names = [
+        # 'vqa_prismer_base',
+        # 'vqa_prismer_large',
         'caption_prismer_base',
         'caption_prismer_large',
     ]
     for model_name in model_names:
         if pathlib.Path(f'prismer/logging/{model_name}').exists():
             continue
+        subprocess.run(shlex.split(f'python download_checkpoints.py --download_models={model_name}'), cwd='prismer')
 def build_deformable_conv() -> None:
+    subprocess.run(shlex.split('sh make.sh'), cwd='prismer/experts/segmentation/mask2former/modeling/pixel_decoder/ops')
 def run_experts(image_path: str) -> tuple[str | None, ...]:
     out_path = image_dir / 'image.jpg'
     cv2.imwrite(out_path.as_posix(), cv2.imread(image_path))
+    expert_names = ['depth', 'edge', 'normal', 'objdet', 'ocrdet', 'segmentation']
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
             env['PYTHONPATH'] = f'{submodule_dir.as_posix()}:{env["PYTHONPATH"]}'
         else:
             env['PYTHONPATH'] = submodule_dir.as_posix()
+        subprocess.run(shlex.split(f'python experts/generate_{expert_name}.py'), cwd='prismer', env=env, check=True)
+    keys = ['depth', 'edge', 'normal', 'seg_coco', 'obj_detection', 'ocr_detection']
+    results = [pathlib.Path('prismer/helpers/labels') / key / 'helpers/images/image.png' for key in keys]
+    return tuple(path.as_posix() if path.exists() else None for path in results)
 class Model:
     def set_model(self, exp_name: str) -> None:
         if exp_name == self.exp_name:
             return
         config = {
+            'dataset': 'demo',
+            'data_path': 'prismer/helpers',
+            'label_path': 'prismer/helpers/labels',
+            'experts': ['depth', 'normal', 'seg_coco', 'edge', 'obj_detection', 'ocr_detection'],
+            'image_resolution': 480,
+            'prismer_model': 'prismer_base',
+            'freeze': 'freeze_vision',
+            'prefix': 'A picture of',
         }
         model = PrismerCaption(config)
+        state_dict = torch.load(f'prismer/logging/caption_{exp_name}/pytorch_model.bin', map_location='cuda:0')
         model.load_state_dict(state_dict)
         model.eval()
         self.config = config
         self.model = model
+        self.tokenizer = model.tokenizer
         self.exp_name = exp_name
     @torch.inference_mode()
     def run_caption_model(self, exp_name: str) -> str:
         self.set_model(exp_name)
         _, test_dataset = create_dataset('caption', self.config)
+        test_loader = create_loader(test_dataset, batch_size=1, num_workers=4, train=False)
         experts, _ = next(iter(test_loader))
+        captions = self.model(experts, train=False, prefix=self.config['prefix'])
+        captions = self.tokenizer(captions, max_length=30, padding='max_length', return_tensors='pt').input_ids
         caption = captions.to(experts['rgb'].device)[0]
         caption = self.tokenizer.decode(caption, skip_special_tokens=True)
         caption = caption.capitalize() + '.'
         return caption
+    def run_caption(self, image_path: str, model_name: str) -> tuple[str | None, ...]:
         out_paths = run_experts(image_path)
+        # caption = self.run_caption_model(model_name)
+        return None, *out_paths