Spaces:

rockeycoss
/

Prompt-Segment-Anything-Demo

Runtime error

Prompt-Segment-Anything-Demo / mmdet /models /dense_heads /anchor_head.py

RockeyCoss

add code files”

51f6859 over 1 year ago

24.6 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import warnings

	import torch
	import torch.nn as nn
	from mmcv.runner import force_fp32

	from mmdet.core import (anchor_inside_flags, build_assigner, build_bbox_coder,
	build_prior_generator, build_sampler, images_to_levels,
	multi_apply, unmap)
	from ..builder import HEADS, build_loss
	from .base_dense_head import BaseDenseHead
	from .dense_test_mixins import BBoxTestMixin


	@HEADS.register_module()
	class AnchorHead(BaseDenseHead, BBoxTestMixin):
	"""Anchor-based head (RPN, RetinaNet, SSD, etc.).

	Args:
	num_classes (int): Number of categories excluding the background
	category.
	in_channels (int): Number of channels in the input feature map.
	feat_channels (int): Number of hidden channels. Used in child classes.
	anchor_generator (dict): Config dict for anchor generator
	bbox_coder (dict): Config of bounding box coder.
	reg_decoded_bbox (bool): If true, the regression loss would be
	applied directly on decoded bounding boxes, converting both
	the predicted boxes and regression targets to absolute
	coordinates format. Default False. It should be `True` when
	using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.
	loss_cls (dict): Config of classification loss.
	loss_bbox (dict): Config of localization loss.
	train_cfg (dict): Training config of anchor head.
	test_cfg (dict): Testing config of anchor head.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	""" # noqa: W605

	def __init__(self,
	num_classes,
	in_channels,
	feat_channels=256,
	anchor_generator=dict(
	type='AnchorGenerator',
	scales=[8, 16, 32],
	ratios=[0.5, 1.0, 2.0],
	strides=[4, 8, 16, 32, 64]),
	bbox_coder=dict(
	type='DeltaXYWHBBoxCoder',
	clip_border=True,
	target_means=(.0, .0, .0, .0),
	target_stds=(1.0, 1.0, 1.0, 1.0)),
	reg_decoded_bbox=False,
	loss_cls=dict(
	type='CrossEntropyLoss',
	use_sigmoid=True,
	loss_weight=1.0),
	loss_bbox=dict(
	type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
	train_cfg=None,
	test_cfg=None,
	init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)):
	super(AnchorHead, self).__init__(init_cfg)
	self.in_channels = in_channels
	self.num_classes = num_classes
	self.feat_channels = feat_channels
	self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
	if self.use_sigmoid_cls:
	self.cls_out_channels = num_classes
	else:
	self.cls_out_channels = num_classes + 1

	if self.cls_out_channels <= 0:
	raise ValueError(f'num_classes={num_classes} is too small')
	self.reg_decoded_bbox = reg_decoded_bbox

	self.bbox_coder = build_bbox_coder(bbox_coder)
	self.loss_cls = build_loss(loss_cls)
	self.loss_bbox = build_loss(loss_bbox)
	self.train_cfg = train_cfg
	self.test_cfg = test_cfg
	if self.train_cfg:
	self.assigner = build_assigner(self.train_cfg.assigner)
	if hasattr(self.train_cfg,
	'sampler') and self.train_cfg.sampler.type.split(
	'.')[-1] != 'PseudoSampler':
	self.sampling = True
	sampler_cfg = self.train_cfg.sampler
	# avoid BC-breaking
	if loss_cls['type'] in [
	'FocalLoss', 'GHMC', 'QualityFocalLoss'
	]:
	warnings.warn(
	'DeprecationWarning: Determining whether to sampling'
	'by loss type is deprecated, please delete sampler in'
	'your config when using `FocalLoss`, `GHMC`, '
	'`QualityFocalLoss` or other FocalLoss variant.')
	self.sampling = False
	sampler_cfg = dict(type='PseudoSampler')
	else:
	self.sampling = False
	sampler_cfg = dict(type='PseudoSampler')
	self.sampler = build_sampler(sampler_cfg, context=self)
	self.fp16_enabled = False

	self.prior_generator = build_prior_generator(anchor_generator)

	# Usually the numbers of anchors for each level are the same
	# except SSD detectors. So it is an int in the most dense
	# heads but a list of int in SSDHead
	self.num_base_priors = self.prior_generator.num_base_priors[0]
	self._init_layers()

	@property
	def num_anchors(self):
	warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '
	'for consistency or also use '
	'`num_base_priors` instead')
	return self.prior_generator.num_base_priors[0]

	@property
	def anchor_generator(self):
	warnings.warn('DeprecationWarning: anchor_generator is deprecated, '
	'please use "prior_generator" instead')
	return self.prior_generator

	def _init_layers(self):
	"""Initialize layers of the head."""
	self.conv_cls = nn.Conv2d(self.in_channels,
	self.num_base_priors * self.cls_out_channels,
	1)
	self.conv_reg = nn.Conv2d(self.in_channels, self.num_base_priors * 4,
	1)

	def forward_single(self, x):
	"""Forward feature of a single scale level.

	Args:
	x (Tensor): Features of a single scale level.

	Returns:
	tuple:
	cls_score (Tensor): Cls scores for a single scale level \
	the channels number is num_base_priors * num_classes.
	bbox_pred (Tensor): Box energies / deltas for a single scale \
	level, the channels number is num_base_priors * 4.
	"""
	cls_score = self.conv_cls(x)
	bbox_pred = self.conv_reg(x)
	return cls_score, bbox_pred

	def forward(self, feats):
	"""Forward features from the upstream network.

	Args:
	feats (tuple[Tensor]): Features from the upstream network, each is
	a 4D-tensor.

	Returns:
	tuple: A tuple of classification scores and bbox prediction.

	- cls_scores (list[Tensor]): Classification scores for all \
	scale levels, each is a 4D-tensor, the channels number \
	is num_base_priors * num_classes.
	- bbox_preds (list[Tensor]): Box energies / deltas for all \
	scale levels, each is a 4D-tensor, the channels number \
	is num_base_priors * 4.
	"""
	return multi_apply(self.forward_single, feats)

	def get_anchors(self, featmap_sizes, img_metas, device='cuda'):
	"""Get anchors according to feature map sizes.

	Args:
	featmap_sizes (list[tuple]): Multi-level feature map sizes.
	img_metas (list[dict]): Image meta info.
	device (torch.device \| str): Device for returned tensors

	Returns:
	tuple:
	anchor_list (list[Tensor]): Anchors of each image.
	valid_flag_list (list[Tensor]): Valid flags of each image.
	"""
	num_imgs = len(img_metas)

	# since feature map sizes of all images are the same, we only compute
	# anchors for one time
	multi_level_anchors = self.prior_generator.grid_priors(
	featmap_sizes, device=device)
	anchor_list = [multi_level_anchors for _ in range(num_imgs)]

	# for each image, we compute valid flags of multi level anchors
	valid_flag_list = []
	for img_id, img_meta in enumerate(img_metas):
	multi_level_flags = self.prior_generator.valid_flags(
	featmap_sizes, img_meta['pad_shape'], device)
	valid_flag_list.append(multi_level_flags)

	return anchor_list, valid_flag_list

	def _get_targets_single(self,
	flat_anchors,
	valid_flags,
	gt_bboxes,
	gt_bboxes_ignore,
	gt_labels,
	img_meta,
	label_channels=1,
	unmap_outputs=True):
	"""Compute regression and classification targets for anchors in a
	single image.

	Args:
	flat_anchors (Tensor): Multi-level anchors of the image, which are
	concatenated into a single tensor of shape (num_anchors ,4)
	valid_flags (Tensor): Multi level valid flags of the image,
	which are concatenated into a single tensor of
	shape (num_anchors,).
	gt_bboxes (Tensor): Ground truth bboxes of the image,
	shape (num_gts, 4).
	gt_bboxes_ignore (Tensor): Ground truth bboxes to be
	ignored, shape (num_ignored_gts, 4).
	img_meta (dict): Meta info of the image.
	gt_labels (Tensor): Ground truth labels of each box,
	shape (num_gts,).
	label_channels (int): Channel of label.
	unmap_outputs (bool): Whether to map outputs back to the original
	set of anchors.

	Returns:
	tuple:
	labels_list (list[Tensor]): Labels of each level
	label_weights_list (list[Tensor]): Label weights of each level
	bbox_targets_list (list[Tensor]): BBox targets of each level
	bbox_weights_list (list[Tensor]): BBox weights of each level
	num_total_pos (int): Number of positive samples in all images
	num_total_neg (int): Number of negative samples in all images
	"""
	inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
	img_meta['img_shape'][:2],
	self.train_cfg.allowed_border)
	if not inside_flags.any():
	return (None, ) * 7
	# assign gt and sample anchors
	anchors = flat_anchors[inside_flags, :]

	assign_result = self.assigner.assign(
	anchors, gt_bboxes, gt_bboxes_ignore,
	None if self.sampling else gt_labels)
	sampling_result = self.sampler.sample(assign_result, anchors,
	gt_bboxes)

	num_valid_anchors = anchors.shape[0]
	bbox_targets = torch.zeros_like(anchors)
	bbox_weights = torch.zeros_like(anchors)
	labels = anchors.new_full((num_valid_anchors, ),
	self.num_classes,
	dtype=torch.long)
	label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)

	pos_inds = sampling_result.pos_inds
	neg_inds = sampling_result.neg_inds
	if len(pos_inds) > 0:
	if not self.reg_decoded_bbox:
	pos_bbox_targets = self.bbox_coder.encode(
	sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)
	else:
	pos_bbox_targets = sampling_result.pos_gt_bboxes
	bbox_targets[pos_inds, :] = pos_bbox_targets
	bbox_weights[pos_inds, :] = 1.0
	if gt_labels is None:
	# Only rpn gives gt_labels as None
	# Foreground is the first class since v2.5.0
	labels[pos_inds] = 0
	else:
	labels[pos_inds] = gt_labels[
	sampling_result.pos_assigned_gt_inds]
	if self.train_cfg.pos_weight <= 0:
	label_weights[pos_inds] = 1.0
	else:
	label_weights[pos_inds] = self.train_cfg.pos_weight
	if len(neg_inds) > 0:
	label_weights[neg_inds] = 1.0

	# map up to original set of anchors
	if unmap_outputs:
	num_total_anchors = flat_anchors.size(0)
	labels = unmap(
	labels, num_total_anchors, inside_flags,
	fill=self.num_classes) # fill bg label
	label_weights = unmap(label_weights, num_total_anchors,
	inside_flags)
	bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
	bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)

	return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
	neg_inds, sampling_result)

	def get_targets(self,
	anchor_list,
	valid_flag_list,
	gt_bboxes_list,
	img_metas,
	gt_bboxes_ignore_list=None,
	gt_labels_list=None,
	label_channels=1,
	unmap_outputs=True,
	return_sampling_results=False):
	"""Compute regression and classification targets for anchors in
	multiple images.

	Args:
	anchor_list (list[list[Tensor]]): Multi level anchors of each
	image. The outer list indicates images, and the inner list
	corresponds to feature levels of the image. Each element of
	the inner list is a tensor of shape (num_anchors, 4).
	valid_flag_list (list[list[Tensor]]): Multi level valid flags of
	each image. The outer list indicates images, and the inner list
	corresponds to feature levels of the image. Each element of
	the inner list is a tensor of shape (num_anchors, )
	gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
	img_metas (list[dict]): Meta info of each image.
	gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be
	ignored.
	gt_labels_list (list[Tensor]): Ground truth labels of each box.
	label_channels (int): Channel of label.
	unmap_outputs (bool): Whether to map outputs back to the original
	set of anchors.

	Returns:
	tuple: Usually returns a tuple containing learning targets.

	- labels_list (list[Tensor]): Labels of each level.
	- label_weights_list (list[Tensor]): Label weights of each
	level.
	- bbox_targets_list (list[Tensor]): BBox targets of each level.
	- bbox_weights_list (list[Tensor]): BBox weights of each level.
	- num_total_pos (int): Number of positive samples in all
	images.
	- num_total_neg (int): Number of negative samples in all
	images.

	additional_returns: This function enables user-defined returns from
	`self._get_targets_single`. These returns are currently refined
	to properties at each feature map (i.e. having HxW dimension).
	The results will be concatenated after the end
	"""
	num_imgs = len(img_metas)
	assert len(anchor_list) == len(valid_flag_list) == num_imgs

	# anchor number of multi levels
	num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
	# concat all level anchors to a single tensor
	concat_anchor_list = []
	concat_valid_flag_list = []
	for i in range(num_imgs):
	assert len(anchor_list[i]) == len(valid_flag_list[i])
	concat_anchor_list.append(torch.cat(anchor_list[i]))
	concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))

	# compute targets for each image
	if gt_bboxes_ignore_list is None:
	gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
	if gt_labels_list is None:
	gt_labels_list = [None for _ in range(num_imgs)]
	results = multi_apply(
	self._get_targets_single,
	concat_anchor_list,
	concat_valid_flag_list,
	gt_bboxes_list,
	gt_bboxes_ignore_list,
	gt_labels_list,
	img_metas,
	label_channels=label_channels,
	unmap_outputs=unmap_outputs)
	(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
	pos_inds_list, neg_inds_list, sampling_results_list) = results[:7]
	rest_results = list(results[7:]) # user-added return values
	# no valid anchors
	if any([labels is None for labels in all_labels]):
	return None
	# sampled anchors of all images
	num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
	num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
	# split targets to a list w.r.t. multiple levels
	labels_list = images_to_levels(all_labels, num_level_anchors)
	label_weights_list = images_to_levels(all_label_weights,
	num_level_anchors)
	bbox_targets_list = images_to_levels(all_bbox_targets,
	num_level_anchors)
	bbox_weights_list = images_to_levels(all_bbox_weights,
	num_level_anchors)
	res = (labels_list, label_weights_list, bbox_targets_list,
	bbox_weights_list, num_total_pos, num_total_neg)
	if return_sampling_results:
	res = res + (sampling_results_list, )
	for i, r in enumerate(rest_results): # user-added return values
	rest_results[i] = images_to_levels(r, num_level_anchors)

	return res + tuple(rest_results)

	def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights,
	bbox_targets, bbox_weights, num_total_samples):
	"""Compute loss of a single scale level.

	Args:
	cls_score (Tensor): Box scores for each scale level
	Has shape (N, num_anchors * num_classes, H, W).
	bbox_pred (Tensor): Box energies / deltas for each scale
	level with shape (N, num_anchors * 4, H, W).
	anchors (Tensor): Box reference for each scale level with shape
	(N, num_total_anchors, 4).
	labels (Tensor): Labels of each anchors with shape
	(N, num_total_anchors).
	label_weights (Tensor): Label weights of each anchor with shape
	(N, num_total_anchors)
	bbox_targets (Tensor): BBox regression targets of each anchor
	weight shape (N, num_total_anchors, 4).
	bbox_weights (Tensor): BBox regression loss weights of each anchor
	with shape (N, num_total_anchors, 4).
	num_total_samples (int): If sampling, num total samples equal to
	the number of total anchors; Otherwise, it is the number of
	positive anchors.

	Returns:
	dict[str, Tensor]: A dictionary of loss components.
	"""
	# classification loss
	labels = labels.reshape(-1)
	label_weights = label_weights.reshape(-1)
	cls_score = cls_score.permute(0, 2, 3,
	1).reshape(-1, self.cls_out_channels)
	loss_cls = self.loss_cls(
	cls_score, labels, label_weights, avg_factor=num_total_samples)
	# regression loss
	bbox_targets = bbox_targets.reshape(-1, 4)
	bbox_weights = bbox_weights.reshape(-1, 4)
	bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)
	if self.reg_decoded_bbox:
	# When the regression loss (e.g. `IouLoss`, `GIouLoss`)
	# is applied directly on the decoded bounding boxes, it
	# decodes the already encoded coordinates to absolute format.
	anchors = anchors.reshape(-1, 4)
	bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)
	loss_bbox = self.loss_bbox(
	bbox_pred,
	bbox_targets,
	bbox_weights,
	avg_factor=num_total_samples)
	return loss_cls, loss_bbox

	@force_fp32(apply_to=('cls_scores', 'bbox_preds'))
	def loss(self,
	cls_scores,
	bbox_preds,
	gt_bboxes,
	gt_labels,
	img_metas,
	gt_bboxes_ignore=None):
	"""Compute losses of the head.

	Args:
	cls_scores (list[Tensor]): Box scores for each scale level
	Has shape (N, num_anchors * num_classes, H, W)
	bbox_preds (list[Tensor]): Box energies / deltas for each scale
	level with shape (N, num_anchors * 4, H, W)
	gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
	shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
	gt_labels (list[Tensor]): class indices corresponding to each box
	img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc.
	gt_bboxes_ignore (None \| list[Tensor]): specify which bounding
	boxes can be ignored when computing the loss. Default: None

	Returns:
	dict[str, Tensor]: A dictionary of loss components.
	"""
	featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
	assert len(featmap_sizes) == self.prior_generator.num_levels

	device = cls_scores[0].device

	anchor_list, valid_flag_list = self.get_anchors(
	featmap_sizes, img_metas, device=device)
	label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
	cls_reg_targets = self.get_targets(
	anchor_list,
	valid_flag_list,
	gt_bboxes,
	img_metas,
	gt_bboxes_ignore_list=gt_bboxes_ignore,
	gt_labels_list=gt_labels,
	label_channels=label_channels)
	if cls_reg_targets is None:
	return None
	(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
	num_total_pos, num_total_neg) = cls_reg_targets
	num_total_samples = (
	num_total_pos + num_total_neg if self.sampling else num_total_pos)

	# anchor number of multi levels
	num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
	# concat all level anchors and flags to a single tensor
	concat_anchor_list = []
	for i in range(len(anchor_list)):
	concat_anchor_list.append(torch.cat(anchor_list[i]))
	all_anchor_list = images_to_levels(concat_anchor_list,
	num_level_anchors)

	losses_cls, losses_bbox = multi_apply(
	self.loss_single,
	cls_scores,
	bbox_preds,
	all_anchor_list,
	labels_list,
	label_weights_list,
	bbox_targets_list,
	bbox_weights_list,
	num_total_samples=num_total_samples)
	return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)

	def aug_test(self, feats, img_metas, rescale=False):
	"""Test function with test time augmentation.

	Args:
	feats (list[Tensor]): the outer list indicates test-time
	augmentations and inner Tensor should have a shape NxCxHxW,
	which contains features for all images in the batch.
	img_metas (list[list[dict]]): the outer list indicates test-time
	augs (multiscale, flip, etc.) and the inner list indicates
	images in a batch. each dict has image information.
	rescale (bool, optional): Whether to rescale the results.
	Defaults to False.

	Returns:
	list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
	The first item is ``bboxes`` with shape (n, 5), where
	5 represent (tl_x, tl_y, br_x, br_y, score).
	The shape of the second tensor in the tuple is ``labels``
	with shape (n,), The length of list should always be 1.
	"""
	return self.aug_test_bboxes(feats, img_metas, rescale=rescale)