|
"""Some special pupropse layers for SSD.""" |
|
|
|
import keras.backend as K |
|
from keras.engine.topology import InputSpec |
|
from keras.engine.topology import Layer |
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
|
|
class Normalize(Layer): |
|
"""Normalization layer as described in ParseNet paper. |
|
|
|
# Arguments |
|
scale: Default feature scale. |
|
|
|
# Input shape |
|
4D tensor with shape: |
|
`(samples, channels, rows, cols)` if dim_ordering='th' |
|
or 4D tensor with shape: |
|
`(samples, rows, cols, channels)` if dim_ordering='tf'. |
|
|
|
# Output shape |
|
Same as input |
|
|
|
# References |
|
http://cs.unc.edu/~wliu/papers/parsenet.pdf |
|
|
|
#TODO |
|
Add possibility to have one scale for all features. |
|
""" |
|
def __init__(self, scale, **kwargs): |
|
if K.image_dim_ordering() == 'tf': |
|
self.axis = 3 |
|
else: |
|
self.axis = 1 |
|
self.scale = scale |
|
super(Normalize, self).__init__(**kwargs) |
|
|
|
def build(self, input_shape): |
|
self.input_spec = [InputSpec(shape=input_shape)] |
|
shape = (input_shape[self.axis],) |
|
init_gamma = self.scale * np.ones(shape) |
|
self.gamma = K.variable(init_gamma, name='{}_gamma'.format(self.name)) |
|
self.trainable_weights = [self.gamma] |
|
|
|
def call(self, x, mask=None): |
|
output = K.l2_normalize(x, self.axis) |
|
output *= self.gamma |
|
return output |
|
|
|
|
|
class PriorBox(Layer): |
|
"""Generate the prior boxes of designated sizes and aspect ratios. |
|
|
|
# Arguments |
|
img_size: Size of the input image as tuple (w, h). |
|
min_size: Minimum box size in pixels. |
|
max_size: Maximum box size in pixels. |
|
aspect_ratios: List of aspect ratios of boxes. |
|
flip: Whether to consider reverse aspect ratios. |
|
variances: List of variances for x, y, w, h. |
|
clip: Whether to clip the prior's coordinates |
|
such that they are within [0, 1]. |
|
|
|
# Input shape |
|
4D tensor with shape: |
|
`(samples, channels, rows, cols)` if dim_ordering='th' |
|
or 4D tensor with shape: |
|
`(samples, rows, cols, channels)` if dim_ordering='tf'. |
|
|
|
# Output shape |
|
3D tensor with shape: |
|
(samples, num_boxes, 8) |
|
|
|
# References |
|
https://arxiv.org/abs/1512.02325 |
|
|
|
#TODO |
|
Add possibility not to have variances. |
|
Add Theano support |
|
""" |
|
def __init__(self, img_size, min_size, max_size=None, aspect_ratios=None, |
|
flip=True, variances=[0.1], clip=True, **kwargs): |
|
if K.image_dim_ordering() == 'tf': |
|
self.waxis = 2 |
|
self.haxis = 1 |
|
else: |
|
self.waxis = 3 |
|
self.haxis = 2 |
|
self.img_size = img_size |
|
if min_size <= 0: |
|
raise Exception('min_size must be positive.') |
|
self.min_size = min_size |
|
self.max_size = max_size |
|
self.aspect_ratios = [1.0] |
|
if max_size: |
|
if max_size < min_size: |
|
raise Exception('max_size must be greater than min_size.') |
|
self.aspect_ratios.append(1.0) |
|
if aspect_ratios: |
|
for ar in aspect_ratios: |
|
if ar in self.aspect_ratios: |
|
continue |
|
self.aspect_ratios.append(ar) |
|
if flip: |
|
self.aspect_ratios.append(1.0 / ar) |
|
self.variances = np.array(variances) |
|
self.clip = True |
|
super(PriorBox, self).__init__(**kwargs) |
|
|
|
def compute_output_shape(self, input_shape): |
|
num_priors_ = len(self.aspect_ratios) |
|
layer_width = input_shape[self.waxis] |
|
layer_height = input_shape[self.haxis] |
|
num_boxes = num_priors_ * layer_width * layer_height |
|
return (input_shape[0], num_boxes, 8) |
|
|
|
def call(self, x, mask=None): |
|
if hasattr(x, '_keras_shape'): |
|
input_shape = x._keras_shape |
|
elif hasattr(K, 'int_shape'): |
|
input_shape = K.int_shape(x) |
|
layer_width = input_shape[self.waxis] |
|
layer_height = input_shape[self.haxis] |
|
img_width = self.img_size[0] |
|
img_height = self.img_size[1] |
|
|
|
box_widths = [] |
|
box_heights = [] |
|
for ar in self.aspect_ratios: |
|
if ar == 1 and len(box_widths) == 0: |
|
box_widths.append(self.min_size) |
|
box_heights.append(self.min_size) |
|
elif ar == 1 and len(box_widths) > 0: |
|
box_widths.append(np.sqrt(self.min_size * self.max_size)) |
|
box_heights.append(np.sqrt(self.min_size * self.max_size)) |
|
elif ar != 1: |
|
box_widths.append(self.min_size * np.sqrt(ar)) |
|
box_heights.append(self.min_size / np.sqrt(ar)) |
|
box_widths = 0.5 * np.array(box_widths) |
|
box_heights = 0.5 * np.array(box_heights) |
|
|
|
step_x = img_width / layer_width |
|
step_y = img_height / layer_height |
|
linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x, |
|
layer_width) |
|
liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, |
|
layer_height) |
|
centers_x, centers_y = np.meshgrid(linx, liny) |
|
centers_x = centers_x.reshape(-1, 1) |
|
centers_y = centers_y.reshape(-1, 1) |
|
|
|
num_priors_ = len(self.aspect_ratios) |
|
prior_boxes = np.concatenate((centers_x, centers_y), axis=1) |
|
prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_)) |
|
prior_boxes[:, ::4] -= box_widths |
|
prior_boxes[:, 1::4] -= box_heights |
|
prior_boxes[:, 2::4] += box_widths |
|
prior_boxes[:, 3::4] += box_heights |
|
prior_boxes[:, ::2] /= img_width |
|
prior_boxes[:, 1::2] /= img_height |
|
prior_boxes = prior_boxes.reshape(-1, 4) |
|
if self.clip: |
|
prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0) |
|
|
|
num_boxes = len(prior_boxes) |
|
if len(self.variances) == 1: |
|
variances = np.ones((num_boxes, 4)) * self.variances[0] |
|
elif len(self.variances) == 4: |
|
variances = np.tile(self.variances, (num_boxes, 1)) |
|
else: |
|
raise Exception('Must provide one or four variances.') |
|
prior_boxes = np.concatenate((prior_boxes, variances), axis=1) |
|
prior_boxes_tensor = K.expand_dims(K.variable(prior_boxes), 0) |
|
if K.backend() == 'tensorflow': |
|
pattern = [tf.shape(x)[0], 1, 1] |
|
prior_boxes_tensor = tf.tile(prior_boxes_tensor, pattern) |
|
elif K.backend() == 'theano': |
|
|
|
pass |
|
return prior_boxes_tensor |
|
|