Unverified Commit 31ae57eb authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Minor fixes for object detection (#5613)

* Internal change.

PiperOrigin-RevId: 213914693

* Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image

PiperOrigin-RevId: 214018767

* Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence.

This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders.

PiperOrigin-RevId: 214091843

* Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image.

PiperOrigin-RevId: 214295305

* Adding the option to be able to summarize gradients.

PiperOrigin-RevId: 214310875

* Adds FasterRCNN inference on CPU

1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape.
2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True.
3. A...
parent 0b0c9cfd
......@@ -872,7 +872,8 @@ def merge_boxes_with_multiple_labels(boxes,
merged_box_indices)
def nearest_neighbor_upsampling(input_tensor, scale):
def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
width_scale=None):
"""Nearest neighbor upsampling implementation.
Nearest neighbor upsampling function that maps input tensor with shape
......@@ -883,19 +884,33 @@ def nearest_neighbor_upsampling(input_tensor, scale):
Args:
input_tensor: A float32 tensor of size [batch, height_in, width_in,
channels].
scale: An integer multiple to scale resolution of input data.
scale: An integer multiple to scale resolution of input data in both height
and width dimensions.
height_scale: An integer multiple to scale the height of input image. This
option when provided overrides `scale` option.
width_scale: An integer multiple to scale the width of input image. This
option when provided overrides `scale` option.
Returns:
data_up: A float32 tensor of size
[batch, height_in*scale, width_in*scale, channels].
Raises:
ValueError: If both scale and height_scale or if both scale and width_scale
are None.
"""
if not scale and (height_scale is None or width_scale is None):
raise ValueError('Provide either `scale` or `height_scale` and'
' `width_scale`.')
with tf.name_scope('nearest_neighbor_upsampling'):
h_scale = scale if height_scale is None else height_scale
w_scale = scale if width_scale is None else width_scale
(batch_size, height, width,
channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
output_tensor = tf.reshape(
input_tensor, [batch_size, height, 1, width, 1, channels]) * tf.ones(
[1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
[1, 1, h_scale, 1, w_scale, 1], dtype=input_tensor.dtype)
return tf.reshape(output_tensor,
[batch_size, height * scale, width * scale, channels])
[batch_size, height * h_scale, width * w_scale, channels])
def matmul_gather_on_zeroth_axis(params, indices, scope=None):
......@@ -1072,29 +1087,35 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return tf.reshape(cropped_regions, final_shape)
def expected_classification_loss_under_sampling(batch_cls_targets, cls_losses,
desired_negative_sampling_ratio,
minimum_negative_sampling):
def expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
desired_negative_sampling_ratio, min_num_negative_samples):
"""Computes classification loss by background/foreground weighting.
The weighting is such that the effective background/foreground weight ratio
is the desired_negative_sampling_ratio. if p_i is the foreground probability
of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, and M
is the sum of foreground probabilities across anchors, then the total loss L
is calculated as:
of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, M
is the sum of foreground probabilities across anchors, and K is the desired
ratio between the number of negative and positive samples, then the total loss
L is calculated as:
beta = K*M/(N-M)
L = sum_{i=1}^N [p_i + beta * (1 - p_i)] * (L(a_i))
L = sum_{i=1}^N [p_i * L_p(a_i) + beta * (1 - p_i) * L_n(a_i)]
where L_p(a_i) is the loss against target assuming the anchor was matched,
otherwise zero, and L_n(a_i) is the loss against the background target
assuming the anchor was unmatched, otherwise zero.
Args:
batch_cls_targets: A tensor with shape [batch_size, num_anchors,
num_classes + 1], where 0'th index is the background class, containing
the class distrubution for the target assigned to a given anchor.
cls_losses: Float tensor of shape [batch_size, num_anchors]
representing anchorwise classification losses.
batch_cls_targets: A tensor with shape [batch_size, num_anchors, num_classes
+ 1], where 0'th index is the background class, containing the class
distrubution for the target assigned to a given anchor.
cls_losses: Float tensor of shape [batch_size, num_anchors] representing
anchorwise classification losses.
unmatched_cls_losses: loss for each anchor against the unmatched class
target.
desired_negative_sampling_ratio: The desired background/foreground weight
ratio.
minimum_negative_sampling: Minimum number of effective negative samples.
min_num_negative_samples: Minimum number of effective negative samples.
Used only when there are no positive examples.
Returns:
......@@ -1103,36 +1124,44 @@ def expected_classification_loss_under_sampling(batch_cls_targets, cls_losses,
num_anchors = tf.cast(tf.shape(batch_cls_targets)[1], tf.float32)
# find the p_i
foreground_probabilities = (
foreground_probabilities_from_targets(batch_cls_targets))
foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
foreground_sum = tf.reduce_sum(foreground_probabilities, axis=-1)
# for each anchor, expected_j is the expected number of positive anchors
# given that this anchor was sampled as negative.
tiled_foreground_sum = tf.tile(
tf.reshape(foreground_sum, [-1, 1]),
[1, tf.cast(num_anchors, tf.int32)])
expected_j = tiled_foreground_sum - foreground_probabilities
k = desired_negative_sampling_ratio
# compute beta
denominators = (num_anchors - foreground_sum)
beta = tf.where(
tf.equal(denominators, 0), tf.zeros_like(foreground_sum),
k * foreground_sum / denominators)
expected_negatives = tf.to_float(num_anchors) - expected_j
desired_negatives = k * expected_j
desired_negatives = tf.where(
tf.greater(desired_negatives, expected_negatives), expected_negatives,
desired_negatives)
# probability that an anchor is sampled for the loss computation given that it
# is negative.
beta = desired_negatives / expected_negatives
# where the foreground sum is zero, use a minimum negative weight.
min_negative_weight = 1.0 * minimum_negative_sampling / num_anchors
min_negative_weight = 1.0 * min_num_negative_samples / num_anchors
beta = tf.where(
tf.equal(foreground_sum, 0), min_negative_weight * tf.ones_like(beta),
beta)
beta = tf.reshape(beta, [-1, 1])
tf.equal(tiled_foreground_sum, 0),
min_negative_weight * tf.ones_like(beta), beta)
cls_loss_weights = foreground_probabilities + (
1 - foreground_probabilities) * beta
foreground_weights = foreground_probabilities
background_weights = (1 - foreground_weights) * beta
weighted_losses = cls_loss_weights * cls_losses
weighted_foreground_losses = foreground_weights * cls_losses
weighted_background_losses = background_weights * unmatched_cls_losses
cls_losses = tf.reduce_sum(weighted_losses, axis=-1)
cls_losses = tf.reduce_sum(
weighted_foreground_losses, axis=-1) + tf.reduce_sum(
weighted_background_losses, axis=-1)
return cls_losses
def foreground_probabilities_from_targets(batch_cls_targets):
foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
return foreground_probabilities
......@@ -1222,7 +1222,7 @@ class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):
class NearestNeighborUpsamplingTest(test_case.TestCase):
def test_upsampling(self):
def test_upsampling_with_single_scale(self):
def graph_fn(inputs):
custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
......@@ -1236,6 +1236,22 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):
[[2], [2], [3], [3]]]]
self.assertAllClose(custom_op_output, expected_output)
def test_upsampling_with_separate_height_width_scales(self):
def graph_fn(inputs):
custom_op_output = ops.nearest_neighbor_upsampling(inputs,
height_scale=2,
width_scale=3)
return custom_op_output
inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
custom_op_output = self.execute(graph_fn, [inputs])
expected_output = [[[[0], [0], [0], [1], [1], [1]],
[[0], [0], [0], [1], [1], [1]],
[[2], [2], [2], [3], [3], [3]],
[[2], [2], [2], [3], [3], [3]]]]
self.assertAllClose(custom_op_output, expected_output)
class MatmulGatherOnZerothAxis(test_case.TestCase):
......@@ -1454,78 +1470,182 @@ class OpsTestExpectedClassificationLoss(test_case.TestCase):
def testExpectedClassificationLossUnderSamplingWithHardLabels(self):
def graph_fn(batch_cls_targets, cls_losses, negative_to_positive_ratio,
minimum_negative_sampling):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, negative_to_positive_ratio,
minimum_negative_sampling)
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0]], [[1., 0, 0], [0, 1., 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
minimum_negative_sampling = np.array([1], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, negative_to_positive_ratio,
minimum_negative_sampling
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foregorund_sum = [1,1]
# expected_beta = [2,2]
# expected_cls_loss_weights = [2,1],[2,1]
# expected_classification_loss_under_sampling = [2*1+1*2, 2*3+1*4]
expected_classification_loss_under_sampling = [2 + 2, 6 + 4]
# expected_foreground_sum = [1,1]
# expected_expected_j = [[1, 0], [1, 0]]
# expected_expected_negatives = [[1, 2], [1, 2]]
# expected_desired_negatives = [[2, 0], [2, 0]]
# expected_beta = [[1, 0], [1, 0]]
# expected_foreground_weights = [[0, 1], [0, 1]]
# expected_background_weights = [[1, 0], [1, 0]]
# expected_weighted_foreground_losses = [[0, 2], [0, 4]]
# expected_weighted_background_losses = [[10, 0], [30, 0]]
# expected_classification_loss_under_sampling = [6, 40]
expected_classification_loss_under_sampling = [2 + 10, 4 + 30]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithHardLabelsMoreNegatives(
self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1., 0, 0], [0, 1., 0], [1., 0, 0], [1., 0, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [1]
# expected_expected_j = [[1, 0, 1, 1, 1]]
# expected_expected_negatives = [[4, 5, 4, 4, 4]]
# expected_desired_negatives = [[2, 0, 2, 2, 2]]
# expected_beta = [[.5, 0, .5, .5, .5]]
# expected_foreground_weights = [[0, 1, 0, 0, 0]]
# expected_background_weights = [[.5, 0, .5, .5, .5]]
# expected_weighted_foreground_losses = [[0, 2, 0, 0, 0]]
# expected_weighted_background_losses = [[10*.5, 0, 30*.5, 40*.5, 50*.5]]
# expected_classification_loss_under_sampling = [5+2+15+20+25]
expected_classification_loss_under_sampling = [5 + 2 + 15 + 20 + 25]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllNegative(self):
def graph_fn(batch_cls_targets, cls_losses):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, negative_to_positive_ratio,
minimum_negative_sampling)
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
minimum_negative_sampling = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn,
[batch_cls_targets, cls_losses])
# expected_foregorund_sum = [0,0]
# expected_beta = [0.5,0.5]
# expected_cls_loss_weights = [0.5,0.5],[0.5,0.5]
# expected_classification_loss_under_sampling = [.5*1+.5*2, .5*3+.5*4]
expected_classification_loss_under_sampling = [1.5, 3.5]
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [0,0]
# expected_expected_j = [[0, 0], [0, 0]]
# expected_expected_negatives = [[2, 2], [2, 2]]
# expected_desired_negatives = [[0, 0], [0, 0]]
# expected_beta = [[0, 0],[0, 0]]
# expected_foreground_weights = [[0, 0], [0, 0]]
# expected_background_weights = [[.5, .5], [.5, .5]]
# expected_weighted_foreground_losses = [[0, 0], [0, 0]]
# expected_weighted_background_losses = [[5, 10], [15, 20]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [
10 * .5 + 20 * .5, 30 * .5 + 40 * .5
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
def testExpectedClassificationLossUnderSamplingWithAllPositive(self):
def graph_fn(batch_cls_targets, cls_losses):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, negative_to_positive_ratio,
minimum_negative_sampling)
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array(
[[[0, 1., 0], [0, 1., 0]], [[0, 1, 0], [0, 0, 1]]], dtype=np.float32)
cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
minimum_negative_sampling = np.array([1], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(
graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
# expected_foreground_sum = [2,2]
# expected_expected_j = [[1, 1], [1, 1]]
# expected_expected_negatives = [[1, 1], [1, 1]]
# expected_desired_negatives = [[1, 1], [1, 1]]
# expected_beta = [[1, 1],[1, 1]]
# expected_foreground_weights = [[1, 1], [1, 1]]
# expected_background_weights = [[0, 0], [0, 0]]
# expected_weighted_foreground_losses = [[1, 2], [3, 4]]
# expected_weighted_background_losses = [[0, 0], [0, 0]]
# expected_classification_loss_under_sampling = [15, 35]
expected_classification_loss_under_sampling = [1 + 2, 3 + 4]
classification_loss = self.execute(graph_fn,
[batch_cls_targets, cls_losses])
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
# expected_foregorund_sum = [2,2]
# expected_beta = [0,0]
# expected_cls_loss_weights = [1,1],[1,1]
# expected_classification_loss_under_sampling = [1*1+1*2, 1*3+1*4]
expected_classification_loss_under_sampling = [1 + 2, 3 + 4]
def testExpectedClassificationLossUnderSamplingWithSoftLabels(self):
def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples):
return ops.expected_classification_loss_under_sampling(
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples)
batch_cls_targets = np.array([[[.75, .25, 0], [0.25, .75, 0], [.75, .25, 0],
[0.25, .75, 0], [1., 0, 0]]],
dtype=np.float32)
cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
negative_to_positive_ratio = np.array([2], dtype=np.float32)
min_num_negative_samples = np.array([1], dtype=np.float32)
classification_loss = self.execute(graph_fn, [
batch_cls_targets, cls_losses, unmatched_cls_losses,
negative_to_positive_ratio, min_num_negative_samples
])
# expected_foreground_sum = [2]
# expected_expected_j = [[1.75, 1.25, 1.75, 1.25, 2]]
# expected_expected_negatives = [[3.25, 3.75, 3.25, 3.75, 3]]
# expected_desired_negatives = [[3.25, 2.5, 3.25, 2.5, 3]]
# expected_beta = [[1, 2/3, 1, 2/3, 1]]
# expected_foreground_weights = [[0.25, .75, .25, .75, 0]]
# expected_background_weights = [[[.75, 1/6., .75, 1/6., 1]]]
# expected_weighted_foreground_losses = [[.25*1, .75*2, .25*3, .75*4, 0*5]]
# expected_weighted_background_losses = [[
# .75*10, 1/6.*20, .75*30, 1/6.*40, 1*50]]
# expected_classification_loss_under_sampling = sum([
# .25*1, .75*2, .25*3, .75*4, 0, .75*10, 1/6.*20, .75*30,
# 1/6.*40, 1*50])
expected_classification_loss_under_sampling = [
sum([
.25 * 1, .75 * 2, .25 * 3, .75 * 4, 0, .75 * 10, 1 / 6. * 20,
.75 * 30, 1 / 6. * 40, 1 * 50
])
]
self.assertAllClose(expected_classification_loss_under_sampling,
classification_loss)
......
......@@ -45,8 +45,10 @@ class MockBoxCoder(box_coder.BoxCoder):
class MockBoxPredictor(box_predictor.BoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, predict_mask=False):
def __init__(self, is_training, num_classes, add_background_class=True,
predict_mask=False):
super(MockBoxPredictor, self).__init__(is_training, num_classes)
self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, num_predictions_per_location):
......@@ -57,10 +59,13 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
code_size = 4
zero = tf.reduce_sum(0 * image_feature)
num_class_slots = self.num_classes
if self._add_background_class:
num_class_slots = num_class_slots + 1
box_encodings = zero + tf.zeros(
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
(batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
......@@ -80,9 +85,11 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def __init__(self, is_training, num_classes, predict_mask=False):
def __init__(self, is_training, num_classes, add_background_class=True,
predict_mask=False):
super(MockKerasBoxPredictor, self).__init__(
is_training, num_classes, False, False)
self._add_background_class = add_background_class
self._predict_mask = predict_mask
def _predict(self, image_features, **kwargs):
......@@ -93,10 +100,13 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
code_size = 4
zero = tf.reduce_sum(0 * image_feature)
num_class_slots = self.num_classes
if self._add_background_class:
num_class_slots = num_class_slots + 1
box_encodings = zero + tf.zeros(
(batch_size, num_anchors, 1, code_size), dtype=tf.float32)
class_predictions_with_background = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
(batch_size, num_anchors, num_class_slots), dtype=tf.float32)
masks = zero + tf.zeros(
(batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
DEFAULT_MASK_SIZE),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment