Spaces:

rockeycoss
/

Prompt-Segment-Anything-Demo

Runtime error

Prompt-Segment-Anything-Demo / mmdet /models /dense_heads /base_dense_head.py

RockeyCoss

add code files”

51f6859 over 1 year ago

23.2 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	from abc import ABCMeta, abstractmethod

	import torch
	from mmcv.cnn.utils.weight_init import constant_init
	from mmcv.ops import batched_nms
	from mmcv.runner import BaseModule, force_fp32

	from mmdet.core.utils import filter_scores_and_topk, select_single_mlvl


	class BaseDenseHead(BaseModule, metaclass=ABCMeta):
	"""Base class for DenseHeads."""

	def __init__(self, init_cfg=None):
	super(BaseDenseHead, self).__init__(init_cfg)

	def init_weights(self):
	super(BaseDenseHead, self).init_weights()
	# avoid init_cfg overwrite the initialization of `conv_offset`
	for m in self.modules():
	# DeformConv2dPack, ModulatedDeformConv2dPack
	if hasattr(m, 'conv_offset'):
	constant_init(m.conv_offset, 0)

	@abstractmethod
	def loss(self, **kwargs):
	"""Compute losses of the head."""
	pass

	@force_fp32(apply_to=('cls_scores', 'bbox_preds'))
	def get_bboxes(self,
	cls_scores,
	bbox_preds,
	score_factors=None,
	img_metas=None,
	cfg=None,
	rescale=False,
	with_nms=True,
	**kwargs):
	"""Transform network outputs of a batch into bbox results.

	Note: When score_factors is not None, the cls_scores are
	usually multiplied by it then obtain the real score used in NMS,
	such as CenterNess in FCOS, IoU branch in ATSS.

	Args:
	cls_scores (list[Tensor]): Classification scores for all
	scale levels, each is a 4D-tensor, has shape
	(batch_size, num_priors * num_classes, H, W).
	bbox_preds (list[Tensor]): Box energies / deltas for all
	scale levels, each is a 4D-tensor, has shape
	(batch_size, num_priors * 4, H, W).
	score_factors (list[Tensor], Optional): Score factor for
	all scale level, each is a 4D-tensor, has shape
	(batch_size, num_priors * 1, H, W). Default None.
	img_metas (list[dict], Optional): Image meta info. Default None.
	cfg (mmcv.Config, Optional): Test / postprocessing configuration,
	if None, test_cfg would be used. Default None.
	rescale (bool): If True, return boxes in original image space.
	Default False.
	with_nms (bool): If True, do nms before return boxes.
	Default True.

	Returns:
	list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple.
	The first item is an (n, 5) tensor, where the first 4 columns
	are bounding box positions (tl_x, tl_y, br_x, br_y) and the
	5-th column is a score between 0 and 1. The second item is a
	(n,) tensor where each item is the predicted class label of
	the corresponding box.
	"""
	assert len(cls_scores) == len(bbox_preds)

	if score_factors is None:
	# e.g. Retina, FreeAnchor, Foveabox, etc.
	with_score_factors = False
	else:
	# e.g. FCOS, PAA, ATSS, AutoAssign, etc.
	with_score_factors = True
	assert len(cls_scores) == len(score_factors)

	num_levels = len(cls_scores)

	featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
	mlvl_priors = self.prior_generator.grid_priors(
	featmap_sizes,
	dtype=cls_scores[0].dtype,
	device=cls_scores[0].device)

	result_list = []

	for img_id in range(len(img_metas)):
	img_meta = img_metas[img_id]
	cls_score_list = select_single_mlvl(cls_scores, img_id)
	bbox_pred_list = select_single_mlvl(bbox_preds, img_id)
	if with_score_factors:
	score_factor_list = select_single_mlvl(score_factors, img_id)
	else:
	score_factor_list = [None for _ in range(num_levels)]

	results = self._get_bboxes_single(cls_score_list, bbox_pred_list,
	score_factor_list, mlvl_priors,
	img_meta, cfg, rescale, with_nms,
	**kwargs)
	result_list.append(results)
	return result_list

	def _get_bboxes_single(self,
	cls_score_list,
	bbox_pred_list,
	score_factor_list,
	mlvl_priors,
	img_meta,
	cfg,
	rescale=False,
	with_nms=True,
	**kwargs):
	"""Transform outputs of a single image into bbox predictions.

	Args:
	cls_score_list (list[Tensor]): Box scores from all scale
	levels of a single image, each item has shape
	(num_priors * num_classes, H, W).
	bbox_pred_list (list[Tensor]): Box energies / deltas from
	all scale levels of a single image, each item has shape
	(num_priors * 4, H, W).
	score_factor_list (list[Tensor]): Score factor from all scale
	levels of a single image, each item has shape
	(num_priors * 1, H, W).
	mlvl_priors (list[Tensor]): Each element in the list is
	the priors of a single level in feature pyramid. In all
	anchor-based methods, it has shape (num_priors, 4). In
	all anchor-free methods, it has shape (num_priors, 2)
	when `with_stride=True`, otherwise it still has shape
	(num_priors, 4).
	img_meta (dict): Image meta info.
	cfg (mmcv.Config): Test / postprocessing configuration,
	if None, test_cfg would be used.
	rescale (bool): If True, return boxes in original image space.
	Default: False.
	with_nms (bool): If True, do nms before return boxes.
	Default: True.

	Returns:
	tuple[Tensor]: Results of detected bboxes and labels. If with_nms
	is False and mlvl_score_factor is None, return mlvl_bboxes and
	mlvl_scores, else return mlvl_bboxes, mlvl_scores and
	mlvl_score_factor. Usually with_nms is False is used for aug
	test. If with_nms is True, then return the following format

	- det_bboxes (Tensor): Predicted bboxes with shape \
	[num_bboxes, 5], where the first 4 columns are bounding \
	box positions (tl_x, tl_y, br_x, br_y) and the 5-th \
	column are scores between 0 and 1.
	- det_labels (Tensor): Predicted labels of the corresponding \
	box with shape [num_bboxes].
	"""
	if score_factor_list[0] is None:
	# e.g. Retina, FreeAnchor, etc.
	with_score_factors = False
	else:
	# e.g. FCOS, PAA, ATSS, etc.
	with_score_factors = True

	cfg = self.test_cfg if cfg is None else cfg
	img_shape = img_meta['img_shape']
	nms_pre = cfg.get('nms_pre', -1)

	mlvl_bboxes = []
	mlvl_scores = []
	mlvl_labels = []
	if with_score_factors:
	mlvl_score_factors = []
	else:
	mlvl_score_factors = None
	for level_idx, (cls_score, bbox_pred, score_factor, priors) in \
	enumerate(zip(cls_score_list, bbox_pred_list,
	score_factor_list, mlvl_priors)):

	assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

	bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
	if with_score_factors:
	score_factor = score_factor.permute(1, 2,
	0).reshape(-1).sigmoid()
	cls_score = cls_score.permute(1, 2,
	0).reshape(-1, self.cls_out_channels)
	if self.use_sigmoid_cls:
	scores = cls_score.sigmoid()
	else:
	# remind that we set FG labels to [0, num_class-1]
	# since mmdet v2.0
	# BG cat_id: num_class
	scores = cls_score.softmax(-1)[:, :-1]

	# After https://github.com/open-mmlab/mmdetection/pull/6268/,
	# this operation keeps fewer bboxes under the same `nms_pre`.
	# There is no difference in performance for most models. If you
	# find a slight drop in performance, you can set a larger
	# `nms_pre` than before.
	results = filter_scores_and_topk(
	scores, cfg.score_thr, nms_pre,
	dict(bbox_pred=bbox_pred, priors=priors))
	scores, labels, keep_idxs, filtered_results = results

	bbox_pred = filtered_results['bbox_pred']
	priors = filtered_results['priors']

	if with_score_factors:
	score_factor = score_factor[keep_idxs]

	bboxes = self.bbox_coder.decode(
	priors, bbox_pred, max_shape=img_shape)

	mlvl_bboxes.append(bboxes)
	mlvl_scores.append(scores)
	mlvl_labels.append(labels)
	if with_score_factors:
	mlvl_score_factors.append(score_factor)

	return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,
	img_meta['scale_factor'], cfg, rescale,
	with_nms, mlvl_score_factors, **kwargs)

	def _bbox_post_process(self,
	mlvl_scores,
	mlvl_labels,
	mlvl_bboxes,
	scale_factor,
	cfg,
	rescale=False,
	with_nms=True,
	mlvl_score_factors=None,
	**kwargs):
	"""bbox post-processing method.

	The boxes would be rescaled to the original image scale and do
	the nms operation. Usually `with_nms` is False is used for aug test.

	Args:
	mlvl_scores (list[Tensor]): Box scores from all scale
	levels of a single image, each item has shape
	(num_bboxes, ).
	mlvl_labels (list[Tensor]): Box class labels from all scale
	levels of a single image, each item has shape
	(num_bboxes, ).
	mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale
	levels of a single image, each item has shape (num_bboxes, 4).
	scale_factor (ndarray, optional): Scale factor of the image arange
	as (w_scale, h_scale, w_scale, h_scale).
	cfg (mmcv.Config): Test / postprocessing configuration,
	if None, test_cfg would be used.
	rescale (bool): If True, return boxes in original image space.
	Default: False.
	with_nms (bool): If True, do nms before return boxes.
	Default: True.
	mlvl_score_factors (list[Tensor], optional): Score factor from
	all scale levels of a single image, each item has shape
	(num_bboxes, ). Default: None.

	Returns:
	tuple[Tensor]: Results of detected bboxes and labels. If with_nms
	is False and mlvl_score_factor is None, return mlvl_bboxes and
	mlvl_scores, else return mlvl_bboxes, mlvl_scores and
	mlvl_score_factor. Usually with_nms is False is used for aug
	test. If with_nms is True, then return the following format

	- det_bboxes (Tensor): Predicted bboxes with shape \
	[num_bboxes, 5], where the first 4 columns are bounding \
	box positions (tl_x, tl_y, br_x, br_y) and the 5-th \
	column are scores between 0 and 1.
	- det_labels (Tensor): Predicted labels of the corresponding \
	box with shape [num_bboxes].
	"""
	assert len(mlvl_scores) == len(mlvl_bboxes) == len(mlvl_labels)

	mlvl_bboxes = torch.cat(mlvl_bboxes)
	if rescale:
	mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
	mlvl_scores = torch.cat(mlvl_scores)
	mlvl_labels = torch.cat(mlvl_labels)

	if mlvl_score_factors is not None:
	# TODO： Add sqrt operation in order to be consistent with
	# the paper.
	mlvl_score_factors = torch.cat(mlvl_score_factors)
	mlvl_scores = mlvl_scores * mlvl_score_factors

	if with_nms:
	if mlvl_bboxes.numel() == 0:
	det_bboxes = torch.cat([mlvl_bboxes, mlvl_scores[:, None]], -1)
	return det_bboxes, mlvl_labels

	det_bboxes, keep_idxs = batched_nms(mlvl_bboxes, mlvl_scores,
	mlvl_labels, cfg.nms)
	det_bboxes = det_bboxes[:cfg.max_per_img]
	det_labels = mlvl_labels[keep_idxs][:cfg.max_per_img]
	return det_bboxes, det_labels
	else:
	return mlvl_bboxes, mlvl_scores, mlvl_labels

	def forward_train(self,
	x,
	img_metas,
	gt_bboxes,
	gt_labels=None,
	gt_bboxes_ignore=None,
	proposal_cfg=None,
	**kwargs):
	"""
	Args:
	x (list[Tensor]): Features from FPN.
	img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc.
	gt_bboxes (Tensor): Ground truth bboxes of the image,
	shape (num_gts, 4).
	gt_labels (Tensor): Ground truth labels of each box,
	shape (num_gts,).
	gt_bboxes_ignore (Tensor): Ground truth bboxes to be
	ignored, shape (num_ignored_gts, 4).
	proposal_cfg (mmcv.Config): Test / postprocessing configuration,
	if None, test_cfg would be used

	Returns:
	tuple:
	losses: (dict[str, Tensor]): A dictionary of loss components.
	proposal_list (list[Tensor]): Proposals of each image.
	"""
	outs = self(x)
	if gt_labels is None:
	loss_inputs = outs + (gt_bboxes, img_metas)
	else:
	loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
	losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
	if proposal_cfg is None:
	return losses
	else:
	proposal_list = self.get_bboxes(
	*outs, img_metas=img_metas, cfg=proposal_cfg)
	return losses, proposal_list

	def simple_test(self, feats, img_metas, rescale=False):
	"""Test function without test-time augmentation.

	Args:
	feats (tuple[torch.Tensor]): Multi-level features from the
	upstream network, each is a 4D-tensor.
	img_metas (list[dict]): List of image information.
	rescale (bool, optional): Whether to rescale the results.
	Defaults to False.

	Returns:
	list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
	The first item is ``bboxes`` with shape (n, 5),
	where 5 represent (tl_x, tl_y, br_x, br_y, score).
	The shape of the second tensor in the tuple is ``labels``
	with shape (n, ).
	"""
	return self.simple_test_bboxes(feats, img_metas, rescale=rescale)

	@force_fp32(apply_to=('cls_scores', 'bbox_preds'))
	def onnx_export(self,
	cls_scores,
	bbox_preds,
	score_factors=None,
	img_metas=None,
	with_nms=True):
	"""Transform network output for a batch into bbox predictions.

	Args:
	cls_scores (list[Tensor]): Box scores for each scale level
	with shape (N, num_points * num_classes, H, W).
	bbox_preds (list[Tensor]): Box energies / deltas for each scale
	level with shape (N, num_points * 4, H, W).
	score_factors (list[Tensor]): score_factors for each s
	cale level with shape (N, num_points * 1, H, W).
	Default: None.
	img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc. Default: None.
	with_nms (bool): Whether apply nms to the bboxes. Default: True.

	Returns:
	tuple[Tensor, Tensor] \| list[tuple]: When `with_nms` is True,
	it is tuple[Tensor, Tensor], first tensor bboxes with shape
	[N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score)
	and second element is class labels of shape [N, num_det].
	When `with_nms` is False, first tensor is bboxes with
	shape [N, num_det, 4], second tensor is raw score has
	shape [N, num_det, num_classes].
	"""
	assert len(cls_scores) == len(bbox_preds)

	num_levels = len(cls_scores)

	featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
	mlvl_priors = self.prior_generator.grid_priors(
	featmap_sizes,
	dtype=bbox_preds[0].dtype,
	device=bbox_preds[0].device)

	mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)]
	mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)]

	assert len(
	img_metas
	) == 1, 'Only support one input image while in exporting to ONNX'
	img_shape = img_metas[0]['img_shape_for_onnx']

	cfg = self.test_cfg
	assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors)
	device = cls_scores[0].device
	batch_size = cls_scores[0].shape[0]
	# convert to tensor to keep tracing
	nms_pre_tensor = torch.tensor(
	cfg.get('nms_pre', -1), device=device, dtype=torch.long)

	# e.g. Retina, FreeAnchor, etc.
	if score_factors is None:
	with_score_factors = False
	mlvl_score_factor = [None for _ in range(num_levels)]
	else:
	# e.g. FCOS, PAA, ATSS, etc.
	with_score_factors = True
	mlvl_score_factor = [
	score_factors[i].detach() for i in range(num_levels)
	]
	mlvl_score_factors = []

	mlvl_batch_bboxes = []
	mlvl_scores = []

	for cls_score, bbox_pred, score_factors, priors in zip(
	mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor,
	mlvl_priors):
	assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

	scores = cls_score.permute(0, 2, 3,
	1).reshape(batch_size, -1,
	self.cls_out_channels)
	if self.use_sigmoid_cls:
	scores = scores.sigmoid()
	nms_pre_score = scores
	else:
	scores = scores.softmax(-1)
	nms_pre_score = scores

	if with_score_factors:
	score_factors = score_factors.permute(0, 2, 3, 1).reshape(
	batch_size, -1).sigmoid()
	bbox_pred = bbox_pred.permute(0, 2, 3,
	1).reshape(batch_size, -1, 4)
	priors = priors.expand(batch_size, -1, priors.size(-1))
	# Get top-k predictions
	from mmdet.core.export import get_k_for_topk
	nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1])
	if nms_pre > 0:

	if with_score_factors:
	nms_pre_score = (nms_pre_score * score_factors[..., None])
	else:
	nms_pre_score = nms_pre_score

	# Get maximum scores for foreground classes.
	if self.use_sigmoid_cls:
	max_scores, _ = nms_pre_score.max(-1)
	else:
	# remind that we set FG labels to [0, num_class-1]
	# since mmdet v2.0
	# BG cat_id: num_class
	max_scores, _ = nms_pre_score[..., :-1].max(-1)
	_, topk_inds = max_scores.topk(nms_pre)

	batch_inds = torch.arange(
	batch_size, device=bbox_pred.device).view(
	-1, 1).expand_as(topk_inds).long()
	# Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501
	transformed_inds = bbox_pred.shape[1] * batch_inds + topk_inds
	priors = priors.reshape(
	-1, priors.size(-1))[transformed_inds, :].reshape(
	batch_size, -1, priors.size(-1))
	bbox_pred = bbox_pred.reshape(-1,
	4)[transformed_inds, :].reshape(
	batch_size, -1, 4)
	scores = scores.reshape(
	-1, self.cls_out_channels)[transformed_inds, :].reshape(
	batch_size, -1, self.cls_out_channels)
	if with_score_factors:
	score_factors = score_factors.reshape(
	-1, 1)[transformed_inds].reshape(batch_size, -1)

	bboxes = self.bbox_coder.decode(
	priors, bbox_pred, max_shape=img_shape)

	mlvl_batch_bboxes.append(bboxes)
	mlvl_scores.append(scores)
	if with_score_factors:
	mlvl_score_factors.append(score_factors)

	batch_bboxes = torch.cat(mlvl_batch_bboxes, dim=1)
	batch_scores = torch.cat(mlvl_scores, dim=1)
	if with_score_factors:
	batch_score_factors = torch.cat(mlvl_score_factors, dim=1)

	# Replace multiclass_nms with ONNX::NonMaxSuppression in deployment

	from mmdet.core.export import add_dummy_nms_for_onnx

	if not self.use_sigmoid_cls:
	batch_scores = batch_scores[..., :self.num_classes]

	if with_score_factors:
	batch_scores = batch_scores * (batch_score_factors.unsqueeze(2))

	if with_nms:
	max_output_boxes_per_class = cfg.nms.get(
	'max_output_boxes_per_class', 200)
	iou_threshold = cfg.nms.get('iou_threshold', 0.5)
	score_threshold = cfg.score_thr
	nms_pre = cfg.get('deploy_nms_pre', -1)
	return add_dummy_nms_for_onnx(batch_bboxes, batch_scores,
	max_output_boxes_per_class,
	iou_threshold, score_threshold,
	nms_pre, cfg.max_per_img)
	else:
	return batch_bboxes, batch_scores