File size: 5,974 Bytes
c9019cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmcv.runner import auto_fp16
from ..builder import NECKS
from .fpn import FPN
@NECKS.register_module()
class PAFPN(FPN):
"""Path Aggregation Network for Instance Segmentation.
This is an implementation of the `PAFPN in Path Aggregation Network
<https://arxiv.org/abs/1803.01534>`_.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
num_outs (int): Number of output scales.
start_level (int): Index of the start input backbone level used to
build the feature pyramid. Default: 0.
end_level (int): Index of the end input backbone level (exclusive) to
build the feature pyramid. Default: -1, which means the last level.
add_extra_convs (bool): Whether to add conv layers on top of the
original feature maps. Default: False.
extra_convs_on_inputs (bool): Whether to apply extra conv on
the original feature from the backbone. Default: False.
relu_before_extra_convs (bool): Whether to apply relu before the extra
conv. Default: False.
no_norm_on_lateral (bool): Whether to apply norm on lateral.
Default: False.
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Config dict for normalization layer. Default: None.
act_cfg (str): Config dict for activation layer in ConvModule.
Default: None.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
in_channels,
out_channels,
num_outs,
start_level=0,
end_level=-1,
add_extra_convs=False,
extra_convs_on_inputs=True,
relu_before_extra_convs=False,
no_norm_on_lateral=False,
conv_cfg=None,
norm_cfg=None,
act_cfg=None,
init_cfg=dict(
type='Xavier', layer='Conv2d', distribution='uniform')):
super(PAFPN, self).__init__(
in_channels,
out_channels,
num_outs,
start_level,
end_level,
add_extra_convs,
extra_convs_on_inputs,
relu_before_extra_convs,
no_norm_on_lateral,
conv_cfg,
norm_cfg,
act_cfg,
init_cfg=init_cfg)
# add extra bottom up pathway
self.downsample_convs = nn.ModuleList()
self.pafpn_convs = nn.ModuleList()
for i in range(self.start_level + 1, self.backbone_end_level):
d_conv = ConvModule(
out_channels,
out_channels,
3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
pafpn_conv = ConvModule(
out_channels,
out_channels,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
self.downsample_convs.append(d_conv)
self.pafpn_convs.append(pafpn_conv)
@auto_fp16()
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.in_channels)
# build laterals
laterals = [
lateral_conv(inputs[i + self.start_level])
for i, lateral_conv in enumerate(self.lateral_convs)
]
# build top-down path
used_backbone_levels = len(laterals)
for i in range(used_backbone_levels - 1, 0, -1):
prev_shape = laterals[i - 1].shape[2:]
laterals[i - 1] += F.interpolate(
laterals[i], size=prev_shape, mode='nearest')
# build outputs
# part 1: from original levels
inter_outs = [
self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
]
# part 2: add bottom-up path
for i in range(0, used_backbone_levels - 1):
inter_outs[i + 1] += self.downsample_convs[i](inter_outs[i])
outs = []
outs.append(inter_outs[0])
outs.extend([
self.pafpn_convs[i - 1](inter_outs[i])
for i in range(1, used_backbone_levels)
])
# part 3: add extra levels
if self.num_outs > len(outs):
# use max pool to get more levels on top of outputs
# (e.g., Faster R-CNN, Mask R-CNN)
if not self.add_extra_convs:
for i in range(self.num_outs - used_backbone_levels):
outs.append(F.max_pool2d(outs[-1], 1, stride=2))
# add conv layers on top of original feature maps (RetinaNet)
else:
if self.add_extra_convs == 'on_input':
orig = inputs[self.backbone_end_level - 1]
outs.append(self.fpn_convs[used_backbone_levels](orig))
elif self.add_extra_convs == 'on_lateral':
outs.append(self.fpn_convs[used_backbone_levels](
laterals[-1]))
elif self.add_extra_convs == 'on_output':
outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
else:
raise NotImplementedError
for i in range(used_backbone_levels + 1, self.num_outs):
if self.relu_before_extra_convs:
outs.append(self.fpn_convs[i](F.relu(outs[-1])))
else:
outs.append(self.fpn_convs[i](outs[-1]))
return tuple(outs)
|