Spaces:

Awiny
/

Image2Paragraph

Runtime error

App Files Files Community

Image2Paragraph / utils /image_dense_captions.py

Awiny

first version submission

c3a1897 over 1 year ago

raw

history blame

3.42 kB

	import argparse
	import multiprocessing as mp
	import os
	import time
	import cv2
	import tqdm
	import sys

	from detectron2.config import get_cfg
	from detectron2.data.detection_utils import read_image
	from detectron2.utils.logger import setup_logger

	sys.path.insert(0, 'third_party/CenterNet2/projects/CenterNet2/')
	from centernet.config import add_centernet_config
	from grit.config import add_grit_config

	from grit.predictor import VisualizationDemo
	import json


	# constants
	WINDOW_NAME = "GRiT"


	def dense_pred_to_caption(predictions):
	boxes = predictions["instances"].pred_boxes if predictions["instances"].has("pred_boxes") else None
	object_description = predictions["instances"].pred_object_descriptions.data
	new_caption = ""
	for i in range(len(object_description)):
	new_caption += (object_description[i] + ": " + str([int(a) for a in boxes[i].tensor.cpu().detach().numpy()[0]])) + "; "
	return new_caption

	def setup_cfg(args):
	cfg = get_cfg()
	if args.cpu:
	cfg.MODEL.DEVICE="cpu"
	add_centernet_config(cfg)
	add_grit_config(cfg)
	cfg.merge_from_file(args.config_file)
	cfg.merge_from_list(args.opts)
	# Set score_threshold for builtin models
	cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
	cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
	if args.test_task:
	cfg.MODEL.TEST_TASK = args.test_task
	cfg.MODEL.BEAM_SIZE = 1
	cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False
	cfg.USE_ACT_CHECKPOINT = False
	cfg.freeze()
	return cfg


	def get_parser():
	parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
	parser.add_argument(
	"--config-file",
	default="",
	metavar="FILE",
	help="path to config file",
	)
	parser.add_argument("--cpu", action='store_true', help="Use CPU only.")
	parser.add_argument(
	"--image_src",
	default="../examples/1.jpg",
	help="Input json file include 'image' and 'caption'; "
	)
	# "/home/aiops/wangjp/Code/LLP/annotation/coco_karpathy_test_dense_caption.json", "/home/aiops/wangjp/Code/LLP/annotation/coco_karpathy_train_dense_caption.json"
	parser.add_argument(
	"--confidence-threshold",
	type=float,
	default=0.5,
	help="Minimum score for instance predictions to be shown",
	)
	parser.add_argument(
	"--test-task",
	type=str,
	default='',
	help="Choose a task to have GRiT perform",
	)
	parser.add_argument(
	"--opts",
	help="Modify config options using the command-line 'KEY VALUE' pairs",
	default=[],
	nargs=argparse.REMAINDER,
	)
	return parser


	if __name__ == "__main__":
	mp.set_start_method("spawn", force=True)
	args = get_parser().parse_args()
	setup_logger(name="fvcore")
	logger = setup_logger()
	logger.info("Arguments: " + str(args))

	cfg = setup_cfg(args)
	demo = VisualizationDemo(cfg)
	if args.image_src:
	img = read_image(args.image_src, format="BGR")
	start_time = time.time()
	predictions, visualized_output = demo.run_on_image(img)
	new_caption = dense_pred_to_caption(predictions)
	print(new_caption)

	output_file = os.path.expanduser("~/grit_output.txt")
	with open(output_file, 'w') as f:
	f.write(new_caption)
	# sys.exit(new_caption)