Spaces:
Paused
Paused
added json output
Browse files
app.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
-
import os
|
|
|
2 |
os.system("pip install gradio==2.4.6")
|
3 |
import sys
|
4 |
import gradio as gr
|
5 |
|
6 |
-
os.system(
|
|
|
|
|
7 |
|
8 |
# clone and install Detic
|
9 |
-
os.system(
|
|
|
|
|
10 |
os.chdir("Detic")
|
11 |
|
12 |
# Install detectron2
|
@@ -16,6 +21,7 @@ import torch
|
|
16 |
# Setup detectron2 logger
|
17 |
import detectron2
|
18 |
from detectron2.utils.logger import setup_logger
|
|
|
19 |
setup_logger()
|
20 |
|
21 |
# import some common libraries
|
@@ -31,8 +37,8 @@ from detectron2.utils.visualizer import Visualizer
|
|
31 |
from detectron2.data import MetadataCatalog, DatasetCatalog
|
32 |
|
33 |
# Detic libraries
|
34 |
-
sys.path.insert(0,
|
35 |
-
sys.path.insert(0,
|
36 |
from centernet.config import add_centernet_config
|
37 |
from detic.config import add_detic_config
|
38 |
from detic.modeling.utils import reset_cls_test
|
@@ -43,31 +49,33 @@ from PIL import Image
|
|
43 |
cfg = get_cfg()
|
44 |
add_centernet_config(cfg)
|
45 |
add_detic_config(cfg)
|
46 |
-
cfg.MODEL.DEVICE=
|
47 |
cfg.merge_from_file("configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml")
|
48 |
-
cfg.MODEL.WEIGHTS =
|
49 |
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
|
50 |
-
cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH =
|
51 |
-
cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL =
|
|
|
|
|
52 |
predictor = DefaultPredictor(cfg)
|
53 |
|
54 |
# Setup the model's vocabulary using build-in datasets
|
55 |
|
56 |
BUILDIN_CLASSIFIER = {
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
}
|
62 |
|
63 |
BUILDIN_METADATA_PATH = {
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
}
|
69 |
|
70 |
-
vocabulary =
|
71 |
metadata = MetadataCatalog.get(BUILDIN_METADATA_PATH[vocabulary])
|
72 |
classifier = BUILDIN_CLASSIFIER[vocabulary]
|
73 |
num_classes = len(metadata.thing_classes)
|
@@ -75,6 +83,7 @@ reset_cls_test(predictor.model, classifier, num_classes)
|
|
75 |
|
76 |
os.system("wget https://web.eecs.umich.edu/~fouhey/fun/desk/desk.jpg")
|
77 |
|
|
|
78 |
def inference(img):
|
79 |
|
80 |
im = cv2.imread(img)
|
@@ -83,17 +92,45 @@ def inference(img):
|
|
83 |
v = Visualizer(im[:, :, ::-1], metadata)
|
84 |
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
title = "Detic"
|
89 |
|
90 |
description = "Gradio demo for Detic: Detecting Twenty-thousand Classes using Image-level Supervision. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
|
91 |
|
92 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.02605' target='_blank'>Detecting Twenty-thousand Classes using Image-level Supervision</a> | <a href='https://github.com/facebookresearch/Detic' target='_blank'>Github Repo</a></p>"
|
93 |
|
94 |
-
examples = [[
|
95 |
-
gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
description=description,
|
97 |
article=article,
|
98 |
-
examples=examples
|
99 |
-
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
os.system("pip install gradio==2.4.6")
|
4 |
import sys
|
5 |
import gradio as gr
|
6 |
|
7 |
+
os.system(
|
8 |
+
"pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html"
|
9 |
+
)
|
10 |
|
11 |
# clone and install Detic
|
12 |
+
os.system(
|
13 |
+
"git clone https://github.com/facebookresearch/Detic.git --recurse-submodules"
|
14 |
+
)
|
15 |
os.chdir("Detic")
|
16 |
|
17 |
# Install detectron2
|
|
|
21 |
# Setup detectron2 logger
|
22 |
import detectron2
|
23 |
from detectron2.utils.logger import setup_logger
|
24 |
+
|
25 |
setup_logger()
|
26 |
|
27 |
# import some common libraries
|
|
|
37 |
from detectron2.data import MetadataCatalog, DatasetCatalog
|
38 |
|
39 |
# Detic libraries
|
40 |
+
sys.path.insert(0, "third_party/CenterNet2/projects/CenterNet2/")
|
41 |
+
sys.path.insert(0, "third_party/CenterNet2/")
|
42 |
from centernet.config import add_centernet_config
|
43 |
from detic.config import add_detic_config
|
44 |
from detic.modeling.utils import reset_cls_test
|
|
|
49 |
cfg = get_cfg()
|
50 |
add_centernet_config(cfg)
|
51 |
add_detic_config(cfg)
|
52 |
+
cfg.MODEL.DEVICE = "cpu"
|
53 |
cfg.merge_from_file("configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml")
|
54 |
+
cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detic/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth"
|
55 |
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
|
56 |
+
cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = "rand"
|
57 |
+
cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = (
|
58 |
+
True # For better visualization purpose. Set to False for all classes.
|
59 |
+
)
|
60 |
predictor = DefaultPredictor(cfg)
|
61 |
|
62 |
# Setup the model's vocabulary using build-in datasets
|
63 |
|
64 |
BUILDIN_CLASSIFIER = {
|
65 |
+
"lvis": "datasets/metadata/lvis_v1_clip_a+cname.npy",
|
66 |
+
"objects365": "datasets/metadata/o365_clip_a+cnamefix.npy",
|
67 |
+
"openimages": "datasets/metadata/oid_clip_a+cname.npy",
|
68 |
+
"coco": "datasets/metadata/coco_clip_a+cname.npy",
|
69 |
}
|
70 |
|
71 |
BUILDIN_METADATA_PATH = {
|
72 |
+
"lvis": "lvis_v1_val",
|
73 |
+
"objects365": "objects365_v2_val",
|
74 |
+
"openimages": "oid_val_expanded",
|
75 |
+
"coco": "coco_2017_val",
|
76 |
}
|
77 |
|
78 |
+
vocabulary = "lvis" # change to 'lvis', 'objects365', 'openimages', or 'coco'
|
79 |
metadata = MetadataCatalog.get(BUILDIN_METADATA_PATH[vocabulary])
|
80 |
classifier = BUILDIN_CLASSIFIER[vocabulary]
|
81 |
num_classes = len(metadata.thing_classes)
|
|
|
83 |
|
84 |
os.system("wget https://web.eecs.umich.edu/~fouhey/fun/desk/desk.jpg")
|
85 |
|
86 |
+
|
87 |
def inference(img):
|
88 |
|
89 |
im = cv2.imread(img)
|
|
|
92 |
v = Visualizer(im[:, :, ::-1], metadata)
|
93 |
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
94 |
|
95 |
+
detected_objects = []
|
96 |
+
box_locations = outputs["instances"].pred_boxes
|
97 |
+
box_loc_screen = box_locations.tensor.cpu().numpy()
|
98 |
+
|
99 |
+
for i, box_coord in enumerate(box_loc_screen):
|
100 |
+
x0, y0, x1, y1 = box_coord
|
101 |
+
width = x1 - x0
|
102 |
+
height = y1 - y0
|
103 |
+
predicted_label = metadata.thing_classes[outputs["instances"].pred_classes[i]]
|
104 |
+
detected_objects.append(
|
105 |
+
{
|
106 |
+
"prediction": predicted_label,
|
107 |
+
"x": int(x0),
|
108 |
+
"y": int(y0),
|
109 |
+
"w": int(width),
|
110 |
+
"h": int(height),
|
111 |
+
}
|
112 |
+
)
|
113 |
+
|
114 |
+
return Image.fromarray(np.uint8(out.get_image())).convert("RGB"), detected_objects
|
115 |
+
|
116 |
+
|
117 |
title = "Detic"
|
118 |
|
119 |
description = "Gradio demo for Detic: Detecting Twenty-thousand Classes using Image-level Supervision. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
|
120 |
|
121 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.02605' target='_blank'>Detecting Twenty-thousand Classes using Image-level Supervision</a> | <a href='https://github.com/facebookresearch/Detic' target='_blank'>Github Repo</a></p>"
|
122 |
|
123 |
+
examples = [["desk.jpg"]]
|
124 |
+
gr.Interface(
|
125 |
+
inference,
|
126 |
+
inputs=gr.inputs.Image(type="filepath"),
|
127 |
+
outputs=[
|
128 |
+
gr.outputs.Image(label="Visualization", type="pil"),
|
129 |
+
gr.outputs.JSON(label="Detected Objects"),
|
130 |
+
],
|
131 |
+
enable_queue=True,
|
132 |
+
title=title,
|
133 |
description=description,
|
134 |
article=article,
|
135 |
+
examples=examples,
|
136 |
+
).launch()
|