Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import copy | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from mmcv.cnn import ConvModule | |
from mmcv.ops import batched_nms | |
from ..builder import HEADS | |
from .anchor_head import AnchorHead | |
class RPNHead(AnchorHead): | |
"""RPN head. | |
Args: | |
in_channels (int): Number of channels in the input feature map. | |
init_cfg (dict or list[dict], optional): Initialization config dict. | |
num_convs (int): Number of convolution layers in the head. Default 1. | |
""" # noqa: W605 | |
def __init__(self, | |
in_channels, | |
init_cfg=dict(type='Normal', layer='Conv2d', std=0.01), | |
num_convs=1, | |
**kwargs): | |
self.num_convs = num_convs | |
super(RPNHead, self).__init__( | |
1, in_channels, init_cfg=init_cfg, **kwargs) | |
def _init_layers(self): | |
"""Initialize layers of the head.""" | |
if self.num_convs > 1: | |
rpn_convs = [] | |
for i in range(self.num_convs): | |
if i == 0: | |
in_channels = self.in_channels | |
else: | |
in_channels = self.feat_channels | |
# use ``inplace=False`` to avoid error: one of the variables | |
# needed for gradient computation has been modified by an | |
# inplace operation. | |
rpn_convs.append( | |
ConvModule( | |
in_channels, | |
self.feat_channels, | |
3, | |
padding=1, | |
inplace=False)) | |
self.rpn_conv = nn.Sequential(*rpn_convs) | |
else: | |
self.rpn_conv = nn.Conv2d( | |
self.in_channels, self.feat_channels, 3, padding=1) | |
self.rpn_cls = nn.Conv2d(self.feat_channels, | |
self.num_base_priors * self.cls_out_channels, | |
1) | |
self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_base_priors * 4, | |
1) | |
def forward_single(self, x): | |
"""Forward feature map of a single scale level.""" | |
x = self.rpn_conv(x) | |
x = F.relu(x, inplace=False) | |
rpn_cls_score = self.rpn_cls(x) | |
rpn_bbox_pred = self.rpn_reg(x) | |
return rpn_cls_score, rpn_bbox_pred | |
def loss(self, | |
cls_scores, | |
bbox_preds, | |
gt_bboxes, | |
img_metas, | |
gt_bboxes_ignore=None): | |
"""Compute losses of the head. | |
Args: | |
cls_scores (list[Tensor]): Box scores for each scale level | |
Has shape (N, num_anchors * num_classes, H, W) | |
bbox_preds (list[Tensor]): Box energies / deltas for each scale | |
level with shape (N, num_anchors * 4, H, W) | |
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with | |
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. | |
img_metas (list[dict]): Meta information of each image, e.g., | |
image size, scaling factor, etc. | |
gt_bboxes_ignore (None | list[Tensor]): specify which bounding | |
boxes can be ignored when computing the loss. | |
Returns: | |
dict[str, Tensor]: A dictionary of loss components. | |
""" | |
losses = super(RPNHead, self).loss( | |
cls_scores, | |
bbox_preds, | |
gt_bboxes, | |
None, | |
img_metas, | |
gt_bboxes_ignore=gt_bboxes_ignore) | |
return dict( | |
loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox']) | |
def _get_bboxes_single(self, | |
cls_score_list, | |
bbox_pred_list, | |
score_factor_list, | |
mlvl_anchors, | |
img_meta, | |
cfg, | |
rescale=False, | |
with_nms=True, | |
**kwargs): | |
"""Transform outputs of a single image into bbox predictions. | |
Args: | |
cls_score_list (list[Tensor]): Box scores from all scale | |
levels of a single image, each item has shape | |
(num_anchors * num_classes, H, W). | |
bbox_pred_list (list[Tensor]): Box energies / deltas from | |
all scale levels of a single image, each item has | |
shape (num_anchors * 4, H, W). | |
score_factor_list (list[Tensor]): Score factor from all scale | |
levels of a single image. RPN head does not need this value. | |
mlvl_anchors (list[Tensor]): Anchors of all scale level | |
each item has shape (num_anchors, 4). | |
img_meta (dict): Image meta info. | |
cfg (mmcv.Config): Test / postprocessing configuration, | |
if None, test_cfg would be used. | |
rescale (bool): If True, return boxes in original image space. | |
Default: False. | |
with_nms (bool): If True, do nms before return boxes. | |
Default: True. | |
Returns: | |
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns | |
are bounding box positions (tl_x, tl_y, br_x, br_y) and the | |
5-th column is a score between 0 and 1. | |
""" | |
cfg = self.test_cfg if cfg is None else cfg | |
cfg = copy.deepcopy(cfg) | |
img_shape = img_meta['img_shape'] | |
# bboxes from different level should be independent during NMS, | |
# level_ids are used as labels for batched NMS to separate them | |
level_ids = [] | |
mlvl_scores = [] | |
mlvl_bbox_preds = [] | |
mlvl_valid_anchors = [] | |
nms_pre = cfg.get('nms_pre', -1) | |
for level_idx in range(len(cls_score_list)): | |
rpn_cls_score = cls_score_list[level_idx] | |
rpn_bbox_pred = bbox_pred_list[level_idx] | |
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] | |
rpn_cls_score = rpn_cls_score.permute(1, 2, 0) | |
if self.use_sigmoid_cls: | |
rpn_cls_score = rpn_cls_score.reshape(-1) | |
scores = rpn_cls_score.sigmoid() | |
else: | |
rpn_cls_score = rpn_cls_score.reshape(-1, 2) | |
# We set FG labels to [0, num_class-1] and BG label to | |
# num_class in RPN head since mmdet v2.5, which is unified to | |
# be consistent with other head since mmdet v2.0. In mmdet v2.0 | |
# to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. | |
scores = rpn_cls_score.softmax(dim=1)[:, 0] | |
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) | |
anchors = mlvl_anchors[level_idx] | |
if 0 < nms_pre < scores.shape[0]: | |
# sort is faster than topk | |
# _, topk_inds = scores.topk(cfg.nms_pre) | |
ranked_scores, rank_inds = scores.sort(descending=True) | |
topk_inds = rank_inds[:nms_pre] | |
scores = ranked_scores[:nms_pre] | |
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] | |
anchors = anchors[topk_inds, :] | |
mlvl_scores.append(scores) | |
mlvl_bbox_preds.append(rpn_bbox_pred) | |
mlvl_valid_anchors.append(anchors) | |
level_ids.append( | |
scores.new_full((scores.size(0), ), | |
level_idx, | |
dtype=torch.long)) | |
return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds, | |
mlvl_valid_anchors, level_ids, cfg, | |
img_shape) | |
def _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors, | |
level_ids, cfg, img_shape, **kwargs): | |
"""bbox post-processing method. | |
Do the nms operation for bboxes in same level. | |
Args: | |
mlvl_scores (list[Tensor]): Box scores from all scale | |
levels of a single image, each item has shape | |
(num_bboxes, ). | |
mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale | |
levels of a single image, each item has shape (num_bboxes, 4). | |
mlvl_valid_anchors (list[Tensor]): Anchors of all scale level | |
each item has shape (num_bboxes, 4). | |
level_ids (list[Tensor]): Indexes from all scale levels of a | |
single image, each item has shape (num_bboxes, ). | |
cfg (mmcv.Config): Test / postprocessing configuration, | |
if None, `self.test_cfg` would be used. | |
img_shape (tuple(int)): The shape of model's input image. | |
Returns: | |
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns | |
are bounding box positions (tl_x, tl_y, br_x, br_y) and the | |
5-th column is a score between 0 and 1. | |
""" | |
scores = torch.cat(mlvl_scores) | |
anchors = torch.cat(mlvl_valid_anchors) | |
rpn_bbox_pred = torch.cat(mlvl_bboxes) | |
proposals = self.bbox_coder.decode( | |
anchors, rpn_bbox_pred, max_shape=img_shape) | |
ids = torch.cat(level_ids) | |
if cfg.min_bbox_size >= 0: | |
w = proposals[:, 2] - proposals[:, 0] | |
h = proposals[:, 3] - proposals[:, 1] | |
valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size) | |
if not valid_mask.all(): | |
proposals = proposals[valid_mask] | |
scores = scores[valid_mask] | |
ids = ids[valid_mask] | |
if proposals.numel() > 0: | |
dets, _ = batched_nms(proposals, scores, ids, cfg.nms) | |
else: | |
return proposals.new_zeros(0, 5) | |
return dets[:cfg.max_per_img] | |
def onnx_export(self, x, img_metas): | |
"""Test without augmentation. | |
Args: | |
x (tuple[Tensor]): Features from the upstream network, each is | |
a 4D-tensor. | |
img_metas (list[dict]): Meta info of each image. | |
Returns: | |
Tensor: dets of shape [N, num_det, 5]. | |
""" | |
cls_scores, bbox_preds = self(x) | |
assert len(cls_scores) == len(bbox_preds) | |
batch_bboxes, batch_scores = super(RPNHead, self).onnx_export( | |
cls_scores, bbox_preds, img_metas=img_metas, with_nms=False) | |
# Use ONNX::NonMaxSuppression in deployment | |
from mmdet.core.export import add_dummy_nms_for_onnx | |
cfg = copy.deepcopy(self.test_cfg) | |
score_threshold = cfg.nms.get('score_thr', 0.0) | |
nms_pre = cfg.get('deploy_nms_pre', -1) | |
# Different from the normal forward doing NMS level by level, | |
# we do NMS across all levels when exporting ONNX. | |
dets, _ = add_dummy_nms_for_onnx(batch_bboxes, batch_scores, | |
cfg.max_per_img, | |
cfg.nms.iou_threshold, | |
score_threshold, nms_pre, | |
cfg.max_per_img) | |
return dets | |