Image2Paragraph / utils /image_dense_captions.py
Awiny's picture
first version submission
c3a1897
raw
history blame
3.42 kB
import argparse
import multiprocessing as mp
import os
import time
import cv2
import tqdm
import sys
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
sys.path.insert(0, 'third_party/CenterNet2/projects/CenterNet2/')
from centernet.config import add_centernet_config
from grit.config import add_grit_config
from grit.predictor import VisualizationDemo
import json
# constants
WINDOW_NAME = "GRiT"
def dense_pred_to_caption(predictions):
boxes = predictions["instances"].pred_boxes if predictions["instances"].has("pred_boxes") else None
object_description = predictions["instances"].pred_object_descriptions.data
new_caption = ""
for i in range(len(object_description)):
new_caption += (object_description[i] + ": " + str([int(a) for a in boxes[i].tensor.cpu().detach().numpy()[0]])) + "; "
return new_caption
def setup_cfg(args):
cfg = get_cfg()
if args.cpu:
cfg.MODEL.DEVICE="cpu"
add_centernet_config(cfg)
add_grit_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
# Set score_threshold for builtin models
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
if args.test_task:
cfg.MODEL.TEST_TASK = args.test_task
cfg.MODEL.BEAM_SIZE = 1
cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
cfg.USE_ACT_CHECKPOINT = False
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
default="",
metavar="FILE",
help="path to config file",
)
parser.add_argument("--cpu", action='store_true', help="Use CPU only.")
parser.add_argument(
"--image_src",
default="../examples/1.jpg",
help="Input json file include 'image' and 'caption'; "
)
# "/home/aiops/wangjp/Code/LLP/annotation/coco_karpathy_test_dense_caption.json", "/home/aiops/wangjp/Code/LLP/annotation/coco_karpathy_train_dense_caption.json"
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--test-task",
type=str,
default='',
help="Choose a task to have GRiT perform",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup_cfg(args)
demo = VisualizationDemo(cfg)
if args.image_src:
img = read_image(args.image_src, format="BGR")
start_time = time.time()
predictions, visualized_output = demo.run_on_image(img)
new_caption = dense_pred_to_caption(predictions)
print(new_caption)
output_file = os.path.expanduser("~/grit_output.txt")
with open(output_file, 'w') as f:
f.write(new_caption)
# sys.exit(new_caption)