File size: 5,751 Bytes
c9019cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""SSD training utils."""
import tensorflow as tf
class MultiboxLoss(object):
"""Multibox loss with some helper functions.
# Arguments
num_classes: Number of classes including background.
alpha: Weight of L1-smooth loss.
neg_pos_ratio: Max ratio of negative to positive boxes in loss.
background_label_id: Id of background label.
negatives_for_hard: Number of negative boxes to consider
it there is no positive boxes in batch.
# References
https://arxiv.org/abs/1512.02325
# TODO
Add possibility for background label id be not zero
"""
def __init__(self, num_classes, alpha=1.0, neg_pos_ratio=3.0,
background_label_id=0, negatives_for_hard=100.0):
self.num_classes = num_classes
self.alpha = alpha
self.neg_pos_ratio = neg_pos_ratio
if background_label_id != 0:
raise Exception('Only 0 as background label id is supported')
self.background_label_id = background_label_id
self.negatives_for_hard = negatives_for_hard
def _l1_smooth_loss(self, y_true, y_pred):
"""Compute L1-smooth loss.
# Arguments
y_true: Ground truth bounding boxes,
tensor of shape (?, num_boxes, 4).
y_pred: Predicted bounding boxes,
tensor of shape (?, num_boxes, 4).
# Returns
l1_loss: L1-smooth loss, tensor of shape (?, num_boxes).
# References
https://arxiv.org/abs/1504.08083
"""
abs_loss = tf.abs(y_true - y_pred)
sq_loss = 0.5 * (y_true - y_pred)**2
l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
return tf.reduce_sum(l1_loss, -1)
def _softmax_loss(self, y_true, y_pred):
"""Compute softmax loss.
# Arguments
y_true: Ground truth targets,
tensor of shape (?, num_boxes, num_classes).
y_pred: Predicted logits,
tensor of shape (?, num_boxes, num_classes).
# Returns
softmax_loss: Softmax loss, tensor of shape (?, num_boxes).
"""
y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)
softmax_loss = -tf.reduce_sum(y_true * tf.log(y_pred),
axis=-1)
return softmax_loss
def compute_loss(self, y_true, y_pred):
"""Compute mutlibox loss.
# Arguments
y_true: Ground truth targets,
tensor of shape (?, num_boxes, 4 + num_classes + 8),
priors in ground truth are fictitious,
y_true[:, :, -8] has 1 if prior should be penalized
or in other words is assigned to some ground truth box,
y_true[:, :, -7:] are all 0.
y_pred: Predicted logits,
tensor of shape (?, num_boxes, 4 + num_classes + 8).
# Returns
loss: Loss for prediction, tensor of shape (?,).
"""
batch_size = tf.shape(y_true)[0]
num_boxes = tf.to_float(tf.shape(y_true)[1])
# loss for all priors
conf_loss = self._softmax_loss(y_true[:, :, 4:-8],
y_pred[:, :, 4:-8])
loc_loss = self._l1_smooth_loss(y_true[:, :, :4],
y_pred[:, :, :4])
# get positives loss
num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1)
pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8],
axis=1)
pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8],
axis=1)
# get negatives loss, we penalize only confidence here
num_neg = tf.minimum(self.neg_pos_ratio * num_pos,
num_boxes - num_pos)
pos_num_neg_mask = tf.greater(num_neg, 0)
has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
num_neg = tf.concat(axis=0, values=[num_neg,
[(1 - has_min) * self.negatives_for_hard]])
num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg,
tf.greater(num_neg, 0)))
num_neg_batch = tf.to_int32(num_neg_batch)
confs_start = 4 + self.background_label_id + 1
confs_end = confs_start + self.num_classes - 1
max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end],
axis=2)
_, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
k=num_neg_batch)
batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
batch_idx = tf.tile(batch_idx, (1, num_neg_batch))
full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) +
tf.reshape(indices, [-1]))
# full_indices = tf.concat(2, [tf.expand_dims(batch_idx, 2),
# tf.expand_dims(indices, 2)])
# neg_conf_loss = tf.gather_nd(conf_loss, full_indices)
neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]),
full_indices)
neg_conf_loss = tf.reshape(neg_conf_loss,
[batch_size, num_neg_batch])
neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1)
# loss is sum of positives and negatives
total_loss = pos_conf_loss + neg_conf_loss
total_loss /= (num_pos + tf.to_float(num_neg_batch))
num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
tf.ones_like(num_pos))
total_loss += (self.alpha * pos_loc_loss) / num_pos
return total_loss
|