Spaces:

rockeycoss
/

Prompt-Segment-Anything-Demo

Runtime error

Prompt-Segment-Anything-Demo / mmdet /models /roi_heads /bbox_heads /bbox_head.py

RockeyCoss

add code files”

51f6859 over 1 year ago

25.7 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from mmcv.runner import BaseModule, auto_fp16, force_fp32
	from torch.nn.modules.utils import _pair

	from mmdet.core import build_bbox_coder, multi_apply, multiclass_nms
	from mmdet.models.builder import HEADS, build_loss
	from mmdet.models.losses import accuracy
	from mmdet.models.utils import build_linear_layer


	@HEADS.register_module()
	class BBoxHead(BaseModule):
	"""Simplest RoI head, with only two fc layers for classification and
	regression respectively."""

	def __init__(self,
	with_avg_pool=False,
	with_cls=True,
	with_reg=True,
	roi_feat_size=7,
	in_channels=256,
	num_classes=80,
	bbox_coder=dict(
	type='DeltaXYWHBBoxCoder',
	clip_border=True,
	target_means=[0., 0., 0., 0.],
	target_stds=[0.1, 0.1, 0.2, 0.2]),
	reg_class_agnostic=False,
	reg_decoded_bbox=False,
	reg_predictor_cfg=dict(type='Linear'),
	cls_predictor_cfg=dict(type='Linear'),
	loss_cls=dict(
	type='CrossEntropyLoss',
	use_sigmoid=False,
	loss_weight=1.0),
	loss_bbox=dict(
	type='SmoothL1Loss', beta=1.0, loss_weight=1.0),
	init_cfg=None):
	super(BBoxHead, self).__init__(init_cfg)
	assert with_cls or with_reg
	self.with_avg_pool = with_avg_pool
	self.with_cls = with_cls
	self.with_reg = with_reg
	self.roi_feat_size = _pair(roi_feat_size)
	self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
	self.in_channels = in_channels
	self.num_classes = num_classes
	self.reg_class_agnostic = reg_class_agnostic
	self.reg_decoded_bbox = reg_decoded_bbox
	self.reg_predictor_cfg = reg_predictor_cfg
	self.cls_predictor_cfg = cls_predictor_cfg
	self.fp16_enabled = False

	self.bbox_coder = build_bbox_coder(bbox_coder)
	self.loss_cls = build_loss(loss_cls)
	self.loss_bbox = build_loss(loss_bbox)

	in_channels = self.in_channels
	if self.with_avg_pool:
	self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
	else:
	in_channels *= self.roi_feat_area
	if self.with_cls:
	# need to add background class
	if self.custom_cls_channels:
	cls_channels = self.loss_cls.get_cls_channels(self.num_classes)
	else:
	cls_channels = num_classes + 1
	self.fc_cls = build_linear_layer(
	self.cls_predictor_cfg,
	in_features=in_channels,
	out_features=cls_channels)
	if self.with_reg:
	out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
	self.fc_reg = build_linear_layer(
	self.reg_predictor_cfg,
	in_features=in_channels,
	out_features=out_dim_reg)
	self.debug_imgs = None
	if init_cfg is None:
	self.init_cfg = []
	if self.with_cls:
	self.init_cfg += [
	dict(
	type='Normal', std=0.01, override=dict(name='fc_cls'))
	]
	if self.with_reg:
	self.init_cfg += [
	dict(
	type='Normal', std=0.001, override=dict(name='fc_reg'))
	]

	@property
	def custom_cls_channels(self):
	return getattr(self.loss_cls, 'custom_cls_channels', False)

	@property
	def custom_activation(self):
	return getattr(self.loss_cls, 'custom_activation', False)

	@property
	def custom_accuracy(self):
	return getattr(self.loss_cls, 'custom_accuracy', False)

	@auto_fp16()
	def forward(self, x):
	if self.with_avg_pool:
	if x.numel() > 0:
	x = self.avg_pool(x)
	x = x.view(x.size(0), -1)
	else:
	# avg_pool does not support empty tensor,
	# so use torch.mean instead it
	x = torch.mean(x, dim=(-1, -2))
	cls_score = self.fc_cls(x) if self.with_cls else None
	bbox_pred = self.fc_reg(x) if self.with_reg else None
	return cls_score, bbox_pred

	def _get_target_single(self, pos_bboxes, neg_bboxes, pos_gt_bboxes,
	pos_gt_labels, cfg):
	"""Calculate the ground truth for proposals in the single image
	according to the sampling results.

	Args:
	pos_bboxes (Tensor): Contains all the positive boxes,
	has shape (num_pos, 4), the last dimension 4
	represents [tl_x, tl_y, br_x, br_y].
	neg_bboxes (Tensor): Contains all the negative boxes,
	has shape (num_neg, 4), the last dimension 4
	represents [tl_x, tl_y, br_x, br_y].
	pos_gt_bboxes (Tensor): Contains gt_boxes for
	all positive samples, has shape (num_pos, 4),
	the last dimension 4
	represents [tl_x, tl_y, br_x, br_y].
	pos_gt_labels (Tensor): Contains gt_labels for
	all positive samples, has shape (num_pos, ).
	cfg (obj:`ConfigDict`): `train_cfg` of R-CNN.

	Returns:
	Tuple[Tensor]: Ground truth for proposals
	in a single image. Containing the following Tensors:

	- labels(Tensor): Gt_labels for all proposals, has
	shape (num_proposals,).
	- label_weights(Tensor): Labels_weights for all
	proposals, has shape (num_proposals,).
	- bbox_targets(Tensor):Regression target for all
	proposals, has shape (num_proposals, 4), the
	last dimension 4 represents [tl_x, tl_y, br_x, br_y].
	- bbox_weights(Tensor):Regression weights for all
	proposals, has shape (num_proposals, 4).
	"""
	num_pos = pos_bboxes.size(0)
	num_neg = neg_bboxes.size(0)
	num_samples = num_pos + num_neg

	# original implementation uses new_zeros since BG are set to be 0
	# now use empty & fill because BG cat_id = num_classes,
	# FG cat_id = [0, num_classes-1]
	labels = pos_bboxes.new_full((num_samples, ),
	self.num_classes,
	dtype=torch.long)
	label_weights = pos_bboxes.new_zeros(num_samples)
	bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
	bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
	if num_pos > 0:
	labels[:num_pos] = pos_gt_labels
	pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
	label_weights[:num_pos] = pos_weight
	if not self.reg_decoded_bbox:
	pos_bbox_targets = self.bbox_coder.encode(
	pos_bboxes, pos_gt_bboxes)
	else:
	# When the regression loss (e.g. `IouLoss`, `GIouLoss`)
	# is applied directly on the decoded bounding boxes, both
	# the predicted boxes and regression targets should be with
	# absolute coordinate format.
	pos_bbox_targets = pos_gt_bboxes
	bbox_targets[:num_pos, :] = pos_bbox_targets
	bbox_weights[:num_pos, :] = 1
	if num_neg > 0:
	label_weights[-num_neg:] = 1.0

	return labels, label_weights, bbox_targets, bbox_weights

	def get_targets(self,
	sampling_results,
	gt_bboxes,
	gt_labels,
	rcnn_train_cfg,
	concat=True):
	"""Calculate the ground truth for all samples in a batch according to
	the sampling_results.

	Almost the same as the implementation in bbox_head, we passed
	additional parameters pos_inds_list and neg_inds_list to
	`_get_target_single` function.

	Args:
	sampling_results (List[obj:SamplingResults]): Assign results of
	all images in a batch after sampling.
	gt_bboxes (list[Tensor]): Gt_bboxes of all images in a batch,
	each tensor has shape (num_gt, 4), the last dimension 4
	represents [tl_x, tl_y, br_x, br_y].
	gt_labels (list[Tensor]): Gt_labels of all images in a batch,
	each tensor has shape (num_gt,).
	rcnn_train_cfg (obj:ConfigDict): `train_cfg` of RCNN.
	concat (bool): Whether to concatenate the results of all
	the images in a single batch.

	Returns:
	Tuple[Tensor]: Ground truth for proposals in a single image.
	Containing the following list of Tensors:

	- labels (list[Tensor],Tensor): Gt_labels for all
	proposals in a batch, each tensor in list has
	shape (num_proposals,) when `concat=False`, otherwise
	just a single tensor has shape (num_all_proposals,).
	- label_weights (list[Tensor]): Labels_weights for
	all proposals in a batch, each tensor in list has
	shape (num_proposals,) when `concat=False`, otherwise
	just a single tensor has shape (num_all_proposals,).
	- bbox_targets (list[Tensor],Tensor): Regression target
	for all proposals in a batch, each tensor in list
	has shape (num_proposals, 4) when `concat=False`,
	otherwise just a single tensor has shape
	(num_all_proposals, 4), the last dimension 4 represents
	[tl_x, tl_y, br_x, br_y].
	- bbox_weights (list[tensor],Tensor): Regression weights for
	all proposals in a batch, each tensor in list has shape
	(num_proposals, 4) when `concat=False`, otherwise just a
	single tensor has shape (num_all_proposals, 4).
	"""
	pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
	neg_bboxes_list = [res.neg_bboxes for res in sampling_results]
	pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
	pos_gt_labels_list = [res.pos_gt_labels for res in sampling_results]
	labels, label_weights, bbox_targets, bbox_weights = multi_apply(
	self._get_target_single,
	pos_bboxes_list,
	neg_bboxes_list,
	pos_gt_bboxes_list,
	pos_gt_labels_list,
	cfg=rcnn_train_cfg)

	if concat:
	labels = torch.cat(labels, 0)
	label_weights = torch.cat(label_weights, 0)
	bbox_targets = torch.cat(bbox_targets, 0)
	bbox_weights = torch.cat(bbox_weights, 0)
	return labels, label_weights, bbox_targets, bbox_weights

	@force_fp32(apply_to=('cls_score', 'bbox_pred'))
	def loss(self,
	cls_score,
	bbox_pred,
	rois,
	labels,
	label_weights,
	bbox_targets,
	bbox_weights,
	reduction_override=None):
	losses = dict()
	if cls_score is not None:
	avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)
	if cls_score.numel() > 0:
	loss_cls_ = self.loss_cls(
	cls_score,
	labels,
	label_weights,
	avg_factor=avg_factor,
	reduction_override=reduction_override)
	if isinstance(loss_cls_, dict):
	losses.update(loss_cls_)
	else:
	losses['loss_cls'] = loss_cls_
	if self.custom_activation:
	acc_ = self.loss_cls.get_accuracy(cls_score, labels)
	losses.update(acc_)
	else:
	losses['acc'] = accuracy(cls_score, labels)
	if bbox_pred is not None:
	bg_class_ind = self.num_classes
	# 0~self.num_classes-1 are FG, self.num_classes is BG
	pos_inds = (labels >= 0) & (labels < bg_class_ind)
	# do not perform bounding box regression for BG anymore.
	if pos_inds.any():
	if self.reg_decoded_bbox:
	# When the regression loss (e.g. `IouLoss`,
	# `GIouLoss`, `DIouLoss`) is applied directly on
	# the decoded bounding boxes, it decodes the
	# already encoded coordinates to absolute format.
	bbox_pred = self.bbox_coder.decode(rois[:, 1:], bbox_pred)
	if self.reg_class_agnostic:
	pos_bbox_pred = bbox_pred.view(
	bbox_pred.size(0), 4)[pos_inds.type(torch.bool)]
	else:
	pos_bbox_pred = bbox_pred.view(
	bbox_pred.size(0), -1,
	4)[pos_inds.type(torch.bool),
	labels[pos_inds.type(torch.bool)]]
	losses['loss_bbox'] = self.loss_bbox(
	pos_bbox_pred,
	bbox_targets[pos_inds.type(torch.bool)],
	bbox_weights[pos_inds.type(torch.bool)],
	avg_factor=bbox_targets.size(0),
	reduction_override=reduction_override)
	else:
	losses['loss_bbox'] = bbox_pred[pos_inds].sum()
	return losses

	@force_fp32(apply_to=('cls_score', 'bbox_pred'))
	def get_bboxes(self,
	rois,
	cls_score,
	bbox_pred,
	img_shape,
	scale_factor,
	rescale=False,
	cfg=None):
	"""Transform network output for a batch into bbox predictions.

	Args:
	rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5).
	last dimension 5 arrange as (batch_index, x1, y1, x2, y2).
	cls_score (Tensor): Box scores, has shape
	(num_boxes, num_classes + 1).
	bbox_pred (Tensor, optional): Box energies / deltas.
	has shape (num_boxes, num_classes * 4).
	img_shape (Sequence[int], optional): Maximum bounds for boxes,
	specifies (H, W, C) or (H, W).
	scale_factor (ndarray): Scale factor of the
	image arrange as (w_scale, h_scale, w_scale, h_scale).
	rescale (bool): If True, return boxes in original image space.
	Default: False.
	cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None

	Returns:
	tuple[Tensor, Tensor]:
	First tensor is `det_bboxes`, has the shape
	(num_boxes, 5) and last
	dimension 5 represent (tl_x, tl_y, br_x, br_y, score).
	Second tensor is the labels with shape (num_boxes, ).
	"""

	# some loss (Seesaw loss..) may have custom activation
	if self.custom_cls_channels:
	scores = self.loss_cls.get_activation(cls_score)
	else:
	scores = F.softmax(
	cls_score, dim=-1) if cls_score is not None else None
	# bbox_pred would be None in some detector when with_reg is False,
	# e.g. Grid R-CNN.
	if bbox_pred is not None:
	bboxes = self.bbox_coder.decode(
	rois[..., 1:], bbox_pred, max_shape=img_shape)
	else:
	bboxes = rois[:, 1:].clone()
	if img_shape is not None:
	bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1])
	bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0])

	if rescale and bboxes.size(0) > 0:
	scale_factor = bboxes.new_tensor(scale_factor)
	bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view(
	bboxes.size()[0], -1)

	if cfg is None:
	return bboxes, scores
	else:
	det_bboxes, det_labels = multiclass_nms(bboxes, scores,
	cfg.score_thr, cfg.nms,
	cfg.max_per_img)

	return det_bboxes, det_labels

	@force_fp32(apply_to=('bbox_preds', ))
	def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
	"""Refine bboxes during training.

	Args:
	rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,
	and bs is the sampled RoIs per image. The first column is
	the image id and the next 4 columns are x1, y1, x2, y2.
	labels (Tensor): Shape (n*bs, ).
	bbox_preds (Tensor): Shape (nbs, 4) or (nbs, 4*#class).
	pos_is_gts (list[Tensor]): Flags indicating if each positive bbox
	is a gt bbox.
	img_metas (list[dict]): Meta info of each image.

	Returns:
	list[Tensor]: Refined bboxes of each image in a mini-batch.

	Example:
	>>> # xdoctest: +REQUIRES(module:kwarray)
	>>> import kwarray
	>>> import numpy as np
	>>> from mmdet.core.bbox.demodata import random_boxes
	>>> self = BBoxHead(reg_class_agnostic=True)
	>>> n_roi = 2
	>>> n_img = 4
	>>> scale = 512
	>>> rng = np.random.RandomState(0)
	>>> img_metas = [{'img_shape': (scale, scale)}
	... for _ in range(n_img)]
	>>> # Create rois in the expected format
	>>> roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)
	>>> img_ids = torch.randint(0, n_img, (n_roi,))
	>>> img_ids = img_ids.float()
	>>> rois = torch.cat([img_ids[:, None], roi_boxes], dim=1)
	>>> # Create other args
	>>> labels = torch.randint(0, 2, (n_roi,)).long()
	>>> bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)
	>>> # For each image, pretend random positive boxes are gts
	>>> is_label_pos = (labels.numpy() > 0).astype(np.int)
	>>> lbl_per_img = kwarray.group_items(is_label_pos,
	... img_ids.numpy())
	>>> pos_per_img = [sum(lbl_per_img.get(gid, []))
	... for gid in range(n_img)]
	>>> pos_is_gts = [
	>>> torch.randint(0, 2, (npos,)).byte().sort(
	>>> descending=True)[0]
	>>> for npos in pos_per_img
	>>> ]
	>>> bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,
	>>> pos_is_gts, img_metas)
	>>> print(bboxes_list)
	"""
	img_ids = rois[:, 0].long().unique(sorted=True)
	assert img_ids.numel() <= len(img_metas)

	bboxes_list = []
	for i in range(len(img_metas)):
	inds = torch.nonzero(
	rois[:, 0] == i, as_tuple=False).squeeze(dim=1)
	num_rois = inds.numel()

	bboxes_ = rois[inds, 1:]
	label_ = labels[inds]
	bbox_pred_ = bbox_preds[inds]
	img_meta_ = img_metas[i]
	pos_is_gts_ = pos_is_gts[i]

	bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,
	img_meta_)

	# filter gt bboxes
	pos_keep = 1 - pos_is_gts_
	keep_inds = pos_is_gts_.new_ones(num_rois)
	keep_inds[:len(pos_is_gts_)] = pos_keep

	bboxes_list.append(bboxes[keep_inds.type(torch.bool)])

	return bboxes_list

	@force_fp32(apply_to=('bbox_pred', ))
	def regress_by_class(self, rois, label, bbox_pred, img_meta):
	"""Regress the bbox for the predicted class. Used in Cascade R-CNN.

	Args:
	rois (Tensor): Rois from `rpn_head` or last stage
	`bbox_head`, has shape (num_proposals, 4) or
	(num_proposals, 5).
	label (Tensor): Only used when `self.reg_class_agnostic`
	is False, has shape (num_proposals, ).
	bbox_pred (Tensor): Regression prediction of
	current stage `bbox_head`. When `self.reg_class_agnostic`
	is False, it has shape (n, num_classes * 4), otherwise
	it has shape (n, 4).
	img_meta (dict): Image meta info.

	Returns:
	Tensor: Regressed bboxes, the same shape as input rois.
	"""

	assert rois.size(1) == 4 or rois.size(1) == 5, repr(rois.shape)

	if not self.reg_class_agnostic:
	label = label * 4
	inds = torch.stack((label, label + 1, label + 2, label + 3), 1)
	bbox_pred = torch.gather(bbox_pred, 1, inds)
	assert bbox_pred.size(1) == 4

	max_shape = img_meta['img_shape']

	if rois.size(1) == 4:
	new_rois = self.bbox_coder.decode(
	rois, bbox_pred, max_shape=max_shape)
	else:
	bboxes = self.bbox_coder.decode(
	rois[:, 1:], bbox_pred, max_shape=max_shape)
	new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)

	return new_rois

	def onnx_export(self,
	rois,
	cls_score,
	bbox_pred,
	img_shape,
	cfg=None,
	**kwargs):
	"""Transform network output for a batch into bbox predictions.

	Args:
	rois (Tensor): Boxes to be transformed.
	Has shape (B, num_boxes, 5)
	cls_score (Tensor): Box scores. has shape
	(B, num_boxes, num_classes + 1), 1 represent the background.
	bbox_pred (Tensor, optional): Box energies / deltas for,
	has shape (B, num_boxes, num_classes * 4) when.
	img_shape (torch.Tensor): Shape of image.
	cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None

	Returns:
	tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]
	and class labels of shape [N, num_det].
	"""

	assert rois.ndim == 3, 'Only support export two stage ' \
	'model to ONNX ' \
	'with batch dimension. '
	if self.custom_cls_channels:
	scores = self.loss_cls.get_activation(cls_score)
	else:
	scores = F.softmax(
	cls_score, dim=-1) if cls_score is not None else None

	if bbox_pred is not None:
	bboxes = self.bbox_coder.decode(
	rois[..., 1:], bbox_pred, max_shape=img_shape)
	else:
	bboxes = rois[..., 1:].clone()
	if img_shape is not None:
	max_shape = bboxes.new_tensor(img_shape)[..., :2]
	min_xy = bboxes.new_tensor(0)
	max_xy = torch.cat(
	[max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2)
	bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)
	bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)

	# Replace multiclass_nms with ONNX::NonMaxSuppression in deployment
	from mmdet.core.export import add_dummy_nms_for_onnx
	max_output_boxes_per_class = cfg.nms.get('max_output_boxes_per_class',
	cfg.max_per_img)
	iou_threshold = cfg.nms.get('iou_threshold', 0.5)
	score_threshold = cfg.score_thr
	nms_pre = cfg.get('deploy_nms_pre', -1)

	scores = scores[..., :self.num_classes]
	if self.reg_class_agnostic:
	return add_dummy_nms_for_onnx(
	bboxes,
	scores,
	max_output_boxes_per_class,
	iou_threshold,
	score_threshold,
	pre_top_k=nms_pre,
	after_top_k=cfg.max_per_img)
	else:
	batch_size = scores.shape[0]
	labels = torch.arange(
	self.num_classes, dtype=torch.long).to(scores.device)
	labels = labels.view(1, 1, -1).expand_as(scores)
	labels = labels.reshape(batch_size, -1)
	scores = scores.reshape(batch_size, -1)
	bboxes = bboxes.reshape(batch_size, -1, 4)

	max_size = torch.max(img_shape)
	# Offset bboxes of each class so that bboxes of different labels
	# do not overlap.
	offsets = (labels * max_size + 1).unsqueeze(2)
	bboxes_for_nms = bboxes + offsets

	batch_dets, labels = add_dummy_nms_for_onnx(
	bboxes_for_nms,
	scores.unsqueeze(2),
	max_output_boxes_per_class,
	iou_threshold,
	score_threshold,
	pre_top_k=nms_pre,
	after_top_k=cfg.max_per_img,
	labels=labels)
	# Offset the bboxes back after dummy nms.
	offsets = (labels * max_size + 1).unsqueeze(2)
	# Indexing + inplace operation fails with dynamic shape in ONNX
	# original style: batch_dets[..., :4] -= offsets
	bboxes, scores = batch_dets[..., 0:4], batch_dets[..., 4:5]
	bboxes -= offsets
	batch_dets = torch.cat([bboxes, scores], dim=2)
	return batch_dets, labels