Minor fixes for object detection (#5613)

* Internal change. PiperOrigin-RevId: 213914693 * Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image PiperOrigin-RevId: 214018767 * Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence. This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders. PiperOrigin-RevId: 214091843 * Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image. PiperOrigin-RevId: 214295305 * Adding the option to be able to summarize gradients. PiperOrigin-RevId: 214310875 * Adds FasterRCNN inference on CPU 1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape. 2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True. 3. A...

Minor fixes for object detection (#5613)
* Internal change. PiperOrigin-RevId: 213914693 * Add original_image_spatial_shape tensor in input dictionary to store shape of the original input image PiperOrigin-RevId: 214018767 * Remove "groundtruth_confidences" from decoders use "groundtruth_weights" to indicate label confidence. This also solves a bug that only surfaced now - random crop routines in core/preprocessor.py did not correctly handle "groundtruth_weight" tensors returned by the decoders. PiperOrigin-RevId: 214091843 * Update CocoMaskEvaluator to allow for a batch of image info, rather than a single image. PiperOrigin-RevId: 214295305 * Adding the option to be able to summarize gradients. PiperOrigin-RevId: 214310875 * Adds FasterRCNN inference on CPU 1. Adds a flag use_static_shapes_for_eval to restrict to the ops that guarantees static shape. 2. No filtering of overlapping anchors while clipping the anchors when use_static_shapes_for_eval is set to True. 3. A...
31ae57eb · pkulzc · GitHub · 0b0c9cfd · 31ae57eb · 31ae57eb
Unverified Commit 31ae57eb authored Nov 02, 2018 by pkulzc Committed by GitHub Nov 02, 2018
3 changed files
--- a/research/object_detection/utils/ops.py
+++ b/research/object_detection/utils/ops.py
@@ -872,7 +872,8 @@ def merge_boxes_with_multiple_labels(boxes,
            merged_box_indices)


-def nearest_neighbor_upsampling(input_tensor, scale):
+def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
+                                width_scale=None):
  """Nearest neighbor upsampling implementation.

  Nearest neighbor upsampling function that maps input tensor with shape
@@ -883,19 +884,33 @@ def nearest_neighbor_upsampling(input_tensor, scale):
  Args:
    input_tensor: A float32 tensor of size [batch, height_in, width_in,
      channels].
-    scale: An integer multiple to scale resolution of input data.
+    scale: An integer multiple to scale resolution of input data in both height
+      and width dimensions.
+    height_scale: An integer multiple to scale the height of input image. This
+      option when provided overrides `scale` option.
+    width_scale: An integer multiple to scale the width of input image. This
+      option when provided overrides `scale` option.
  Returns:
    data_up: A float32 tensor of size
      [batch, height_in*scale, width_in*scale, channels].
+
+  Raises:
+    ValueError: If both scale and height_scale or if both scale and width_scale
+      are None.
  """
+  if not scale and (height_scale is None or width_scale is None):
+    raise ValueError('Provide either `scale` or `height_scale` and'
+                     ' `width_scale`.')
  with tf.name_scope('nearest_neighbor_upsampling'):
+    h_scale = scale if height_scale is None else height_scale
+    w_scale = scale if width_scale is None else width_scale
    (batch_size, height, width,
     channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
    output_tensor = tf.reshape(
        input_tensor, [batch_size, height, 1, width, 1, channels]) * tf.ones(
-            [1, 1, scale, 1, scale, 1], dtype=input_tensor.dtype)
+            [1, 1, h_scale, 1, w_scale, 1], dtype=input_tensor.dtype)
    return tf.reshape(output_tensor,
-                      [batch_size, height * scale, width * scale, channels])
+                      [batch_size, height * h_scale, width * w_scale, channels])


 def matmul_gather_on_zeroth_axis(params, indices, scope=None):
@@ -1072,29 +1087,35 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
    return tf.reshape(cropped_regions, final_shape)


-def expected_classification_loss_under_sampling(batch_cls_targets, cls_losses,
-                                                desired_negative_sampling_ratio,
-                                                minimum_negative_sampling):
+def expected_classification_loss_under_sampling(
+    batch_cls_targets, cls_losses, unmatched_cls_losses,
+    desired_negative_sampling_ratio, min_num_negative_samples):
  """Computes classification loss by background/foreground weighting.

  The weighting is such that the effective background/foreground weight ratio
  is the desired_negative_sampling_ratio. if p_i is the foreground probability
-  of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, and M
-  is the sum of foreground probabilities across anchors, then the total loss L
-  is calculated as:
+  of anchor a_i, L(a_i) is the anchors loss, N is the number of anchors, M
+  is the sum of foreground probabilities across anchors, and K is the desired
+  ratio between the number of negative and positive samples, then the total loss
+  L is calculated as:

  beta = K*M/(N-M)
-  L = sum_{i=1}^N [p_i + beta * (1 - p_i)] * (L(a_i))
+  L = sum_{i=1}^N [p_i * L_p(a_i) + beta * (1 - p_i) * L_n(a_i)]
+  where L_p(a_i) is the loss against target assuming the anchor was matched,
+  otherwise zero, and L_n(a_i) is the loss against the background target
+  assuming the anchor was unmatched, otherwise zero.

  Args:
-    batch_cls_targets: A tensor with shape [batch_size, num_anchors,
-        num_classes + 1], where 0'th index is the background class, containing
-        the class distrubution for the target assigned to a given anchor.
-    cls_losses: Float tensor of shape [batch_size, num_anchors]
-        representing anchorwise classification losses.
+    batch_cls_targets: A tensor with shape [batch_size, num_anchors, num_classes
+      + 1], where 0'th index is the background class, containing the class
+      distrubution for the target assigned to a given anchor.
+    cls_losses: Float tensor of shape [batch_size, num_anchors] representing
+      anchorwise classification losses.
+    unmatched_cls_losses: loss for each anchor against the unmatched class
+      target.
    desired_negative_sampling_ratio: The desired background/foreground weight
      ratio.
-    minimum_negative_sampling: Minimum number of effective negative samples.
+    min_num_negative_samples: Minimum number of effective negative samples.
      Used only when there are no positive examples.

  Returns:
@@ -1103,36 +1124,44 @@ def expected_classification_loss_under_sampling(batch_cls_targets, cls_losses,
  num_anchors = tf.cast(tf.shape(batch_cls_targets)[1], tf.float32)

  # find the p_i
-  foreground_probabilities = (
-      foreground_probabilities_from_targets(batch_cls_targets))
+  foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
+
  foreground_sum = tf.reduce_sum(foreground_probabilities, axis=-1)

+  # for each anchor, expected_j is the expected number of positive anchors
+  # given that this anchor was sampled as negative.
+  tiled_foreground_sum = tf.tile(
+      tf.reshape(foreground_sum, [-1, 1]),
+      [1, tf.cast(num_anchors, tf.int32)])
+  expected_j = tiled_foreground_sum - foreground_probabilities
+
  k = desired_negative_sampling_ratio

  # compute beta
-  denominators = (num_anchors - foreground_sum)
-  beta = tf.where(
-      tf.equal(denominators, 0), tf.zeros_like(foreground_sum),
-      k * foreground_sum / denominators)
+  expected_negatives = tf.to_float(num_anchors) - expected_j
+  desired_negatives = k * expected_j
+  desired_negatives = tf.where(
+      tf.greater(desired_negatives, expected_negatives), expected_negatives,
+      desired_negatives)
+
+  # probability that an anchor is sampled for the loss computation given that it
+  # is negative.
+  beta = desired_negatives / expected_negatives

  # where the foreground sum is zero, use a minimum negative weight.
-  min_negative_weight = 1.0 * minimum_negative_sampling / num_anchors
+  min_negative_weight = 1.0 * min_num_negative_samples / num_anchors
  beta = tf.where(
-      tf.equal(foreground_sum, 0), min_negative_weight * tf.ones_like(beta),
-      beta)
-  beta = tf.reshape(beta, [-1, 1])
+      tf.equal(tiled_foreground_sum, 0),
+      min_negative_weight * tf.ones_like(beta), beta)

-  cls_loss_weights = foreground_probabilities + (
-      1 - foreground_probabilities) * beta
+  foreground_weights = foreground_probabilities
+  background_weights = (1 - foreground_weights) * beta

-  weighted_losses = cls_loss_weights * cls_losses
+  weighted_foreground_losses = foreground_weights * cls_losses
+  weighted_background_losses = background_weights * unmatched_cls_losses

-  cls_losses = tf.reduce_sum(weighted_losses, axis=-1)
+  cls_losses = tf.reduce_sum(
+      weighted_foreground_losses, axis=-1) + tf.reduce_sum(
+          weighted_background_losses, axis=-1)

  return cls_losses
-
-
-def foreground_probabilities_from_targets(batch_cls_targets):
-  foreground_probabilities = 1 - batch_cls_targets[:, :, 0]
-
-  return foreground_probabilities
--- a/research/object_detection/utils/ops_test.py
+++ b/research/object_detection/utils/ops_test.py
@@ -1222,7 +1222,7 @@ class MergeBoxesWithMultipleLabelsTest(tf.test.TestCase):

 class NearestNeighborUpsamplingTest(test_case.TestCase):

-  def test_upsampling(self):
+  def test_upsampling_with_single_scale(self):

    def graph_fn(inputs):
      custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
@@ -1236,6 +1236,22 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):
                        [[2], [2], [3], [3]]]]
    self.assertAllClose(custom_op_output, expected_output)

+  def test_upsampling_with_separate_height_width_scales(self):
+
+    def graph_fn(inputs):
+      custom_op_output = ops.nearest_neighbor_upsampling(inputs,
+                                                         height_scale=2,
+                                                         width_scale=3)
+      return custom_op_output
+    inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
+    custom_op_output = self.execute(graph_fn, [inputs])
+
+    expected_output = [[[[0], [0], [0], [1], [1], [1]],
+                        [[0], [0], [0], [1], [1], [1]],
+                        [[2], [2], [2], [3], [3], [3]],
+                        [[2], [2], [2], [3], [3], [3]]]]
+    self.assertAllClose(custom_op_output, expected_output)
+

 class MatmulGatherOnZerothAxis(test_case.TestCase):

@@ -1454,78 +1470,182 @@ class OpsTestExpectedClassificationLoss(test_case.TestCase):

  def testExpectedClassificationLossUnderSamplingWithHardLabels(self):

-    def graph_fn(batch_cls_targets, cls_losses, negative_to_positive_ratio,
-                 minimum_negative_sampling):
+    def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
+                 negative_to_positive_ratio, min_num_negative_samples):
      return ops.expected_classification_loss_under_sampling(
-          batch_cls_targets, cls_losses, negative_to_positive_ratio,
-          minimum_negative_sampling)
+          batch_cls_targets, cls_losses, unmatched_cls_losses,
+          negative_to_positive_ratio, min_num_negative_samples)

    batch_cls_targets = np.array(
        [[[1., 0, 0], [0, 1., 0]], [[1., 0, 0], [0, 1., 0]]], dtype=np.float32)
    cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
+    unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
    negative_to_positive_ratio = np.array([2], dtype=np.float32)
-    minimum_negative_sampling = np.array([1], dtype=np.float32)
+    min_num_negative_samples = np.array([1], dtype=np.float32)

    classification_loss = self.execute(graph_fn, [
-        batch_cls_targets, cls_losses, negative_to_positive_ratio,
-        minimum_negative_sampling
+        batch_cls_targets, cls_losses, unmatched_cls_losses,
+        negative_to_positive_ratio, min_num_negative_samples
    ])

-    # expected_foregorund_sum = [1,1]
-    # expected_beta = [2,2]
-    # expected_cls_loss_weights = [2,1],[2,1]
-    # expected_classification_loss_under_sampling = [2*1+1*2, 2*3+1*4]
-    expected_classification_loss_under_sampling = [2 + 2, 6 + 4]
+    # expected_foreground_sum = [1,1]
+    # expected_expected_j = [[1, 0], [1, 0]]
+    # expected_expected_negatives = [[1, 2], [1, 2]]
+    # expected_desired_negatives = [[2, 0], [2, 0]]
+    # expected_beta = [[1, 0], [1, 0]]
+    # expected_foreground_weights = [[0, 1], [0, 1]]
+    # expected_background_weights = [[1, 0], [1, 0]]
+    # expected_weighted_foreground_losses = [[0, 2], [0, 4]]
+    # expected_weighted_background_losses = [[10, 0], [30, 0]]
+    # expected_classification_loss_under_sampling = [6, 40]
+    expected_classification_loss_under_sampling = [2 + 10, 4 + 30]
+
+    self.assertAllClose(expected_classification_loss_under_sampling,
+                        classification_loss)
+
+  def testExpectedClassificationLossUnderSamplingWithHardLabelsMoreNegatives(
+      self):
+
+    def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
+                 negative_to_positive_ratio, min_num_negative_samples):
+      return ops.expected_classification_loss_under_sampling(
+          batch_cls_targets, cls_losses, unmatched_cls_losses,
+          negative_to_positive_ratio, min_num_negative_samples)
+
+    batch_cls_targets = np.array(
+        [[[1., 0, 0], [0, 1., 0], [1., 0, 0], [1., 0, 0], [1., 0, 0]]],
+        dtype=np.float32)
+    cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
+    unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
+    negative_to_positive_ratio = np.array([2], dtype=np.float32)
+    min_num_negative_samples = np.array([1], dtype=np.float32)
+
+    classification_loss = self.execute(graph_fn, [
+        batch_cls_targets, cls_losses, unmatched_cls_losses,
+        negative_to_positive_ratio, min_num_negative_samples
+    ])
+
+    # expected_foreground_sum = [1]
+    # expected_expected_j = [[1, 0, 1, 1, 1]]
+    # expected_expected_negatives = [[4, 5, 4, 4, 4]]
+    # expected_desired_negatives = [[2, 0, 2, 2, 2]]
+    # expected_beta = [[.5, 0, .5, .5, .5]]
+    # expected_foreground_weights = [[0, 1, 0, 0, 0]]
+    # expected_background_weights = [[.5, 0, .5, .5, .5]]
+    # expected_weighted_foreground_losses = [[0, 2, 0, 0, 0]]
+    # expected_weighted_background_losses = [[10*.5, 0, 30*.5, 40*.5, 50*.5]]
+    # expected_classification_loss_under_sampling = [5+2+15+20+25]
+    expected_classification_loss_under_sampling = [5 + 2 + 15 + 20 + 25]

    self.assertAllClose(expected_classification_loss_under_sampling,
                        classification_loss)

  def testExpectedClassificationLossUnderSamplingWithAllNegative(self):

-    def graph_fn(batch_cls_targets, cls_losses):
+    def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
      return ops.expected_classification_loss_under_sampling(
-          batch_cls_targets, cls_losses, negative_to_positive_ratio,
-          minimum_negative_sampling)
+          batch_cls_targets, cls_losses, unmatched_cls_losses,
+          negative_to_positive_ratio, min_num_negative_samples)

    batch_cls_targets = np.array(
        [[[1, 0, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 0]]], dtype=np.float32)
    cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
+    unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
    negative_to_positive_ratio = np.array([2], dtype=np.float32)
-    minimum_negative_sampling = np.array([1], dtype=np.float32)
-
-    classification_loss = self.execute(graph_fn,
-                                       [batch_cls_targets, cls_losses])
-
-    # expected_foregorund_sum = [0,0]
-    # expected_beta = [0.5,0.5]
-    # expected_cls_loss_weights = [0.5,0.5],[0.5,0.5]
-    # expected_classification_loss_under_sampling = [.5*1+.5*2, .5*3+.5*4]
-    expected_classification_loss_under_sampling = [1.5, 3.5]
+    min_num_negative_samples = np.array([1], dtype=np.float32)
+
+    classification_loss = self.execute(
+        graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
+
+    # expected_foreground_sum = [0,0]
+    # expected_expected_j = [[0, 0], [0, 0]]
+    # expected_expected_negatives = [[2, 2], [2, 2]]
+    # expected_desired_negatives = [[0, 0], [0, 0]]
+    # expected_beta = [[0, 0],[0, 0]]
+    # expected_foreground_weights = [[0, 0], [0, 0]]
+    # expected_background_weights = [[.5, .5], [.5, .5]]
+    # expected_weighted_foreground_losses = [[0, 0], [0, 0]]
+    # expected_weighted_background_losses = [[5, 10], [15, 20]]
+    # expected_classification_loss_under_sampling = [15, 35]
+    expected_classification_loss_under_sampling = [
+        10 * .5 + 20 * .5, 30 * .5 + 40 * .5
+    ]

    self.assertAllClose(expected_classification_loss_under_sampling,
                        classification_loss)

  def testExpectedClassificationLossUnderSamplingWithAllPositive(self):

-    def graph_fn(batch_cls_targets, cls_losses):
+    def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses):
      return ops.expected_classification_loss_under_sampling(
-          batch_cls_targets, cls_losses, negative_to_positive_ratio,
-          minimum_negative_sampling)
+          batch_cls_targets, cls_losses, unmatched_cls_losses,
+          negative_to_positive_ratio, min_num_negative_samples)

    batch_cls_targets = np.array(
        [[[0, 1., 0], [0, 1., 0]], [[0, 1, 0], [0, 0, 1]]], dtype=np.float32)
    cls_losses = np.array([[1, 2], [3, 4]], dtype=np.float32)
+    unmatched_cls_losses = np.array([[10, 20], [30, 40]], dtype=np.float32)
    negative_to_positive_ratio = np.array([2], dtype=np.float32)
-    minimum_negative_sampling = np.array([1], dtype=np.float32)
+    min_num_negative_samples = np.array([1], dtype=np.float32)
+
+    classification_loss = self.execute(
+        graph_fn, [batch_cls_targets, cls_losses, unmatched_cls_losses])
+
+    # expected_foreground_sum = [2,2]
+    # expected_expected_j = [[1, 1], [1, 1]]
+    # expected_expected_negatives = [[1, 1], [1, 1]]
+    # expected_desired_negatives = [[1, 1], [1, 1]]
+    # expected_beta = [[1, 1],[1, 1]]
+    # expected_foreground_weights = [[1, 1], [1, 1]]
+    # expected_background_weights = [[0, 0], [0, 0]]
+    # expected_weighted_foreground_losses = [[1, 2], [3, 4]]
+    # expected_weighted_background_losses = [[0, 0], [0, 0]]
+    # expected_classification_loss_under_sampling = [15, 35]
+    expected_classification_loss_under_sampling = [1 + 2, 3 + 4]

-    classification_loss = self.execute(graph_fn,
-                                       [batch_cls_targets, cls_losses])
+    self.assertAllClose(expected_classification_loss_under_sampling,
+                        classification_loss)

-    # expected_foregorund_sum = [2,2]
-    # expected_beta = [0,0]
-    # expected_cls_loss_weights = [1,1],[1,1]
-    # expected_classification_loss_under_sampling = [1*1+1*2, 1*3+1*4]
-    expected_classification_loss_under_sampling = [1 + 2, 3 + 4]
+  def testExpectedClassificationLossUnderSamplingWithSoftLabels(self):
+
+    def graph_fn(batch_cls_targets, cls_losses, unmatched_cls_losses,
+                 negative_to_positive_ratio, min_num_negative_samples):
+      return ops.expected_classification_loss_under_sampling(
+          batch_cls_targets, cls_losses, unmatched_cls_losses,
+          negative_to_positive_ratio, min_num_negative_samples)
+
+    batch_cls_targets = np.array([[[.75, .25, 0], [0.25, .75, 0], [.75, .25, 0],
+                                   [0.25, .75, 0], [1., 0, 0]]],
+                                 dtype=np.float32)
+    cls_losses = np.array([[1, 2, 3, 4, 5]], dtype=np.float32)
+    unmatched_cls_losses = np.array([[10, 20, 30, 40, 50]], dtype=np.float32)
+    negative_to_positive_ratio = np.array([2], dtype=np.float32)
+    min_num_negative_samples = np.array([1], dtype=np.float32)
+
+    classification_loss = self.execute(graph_fn, [
+        batch_cls_targets, cls_losses, unmatched_cls_losses,
+        negative_to_positive_ratio, min_num_negative_samples
+    ])
+
+    # expected_foreground_sum = [2]
+    # expected_expected_j = [[1.75, 1.25, 1.75, 1.25, 2]]
+    # expected_expected_negatives = [[3.25, 3.75, 3.25, 3.75, 3]]
+    # expected_desired_negatives = [[3.25, 2.5, 3.25, 2.5, 3]]
+    # expected_beta = [[1, 2/3, 1, 2/3, 1]]
+    # expected_foreground_weights = [[0.25, .75, .25, .75, 0]]
+    # expected_background_weights = [[[.75, 1/6., .75, 1/6., 1]]]
+    # expected_weighted_foreground_losses = [[.25*1, .75*2, .25*3, .75*4, 0*5]]
+    # expected_weighted_background_losses = [[
+    #     .75*10, 1/6.*20, .75*30, 1/6.*40, 1*50]]
+    # expected_classification_loss_under_sampling = sum([
+    #     .25*1, .75*2, .25*3, .75*4, 0, .75*10, 1/6.*20, .75*30,
+    #     1/6.*40, 1*50])
+    expected_classification_loss_under_sampling = [
+        sum([
+            .25 * 1, .75 * 2, .25 * 3, .75 * 4, 0, .75 * 10, 1 / 6. * 20,
+            .75 * 30, 1 / 6. * 40, 1 * 50
+        ])
+    ]

    self.assertAllClose(expected_classification_loss_under_sampling,
                        classification_loss)

--- a/research/object_detection/utils/test_utils.py
+++ b/research/object_detection/utils/test_utils.py
@@ -45,8 +45,10 @@ class MockBoxCoder(box_coder.BoxCoder):
 class MockBoxPredictor(box_predictor.BoxPredictor):
  """Simple box predictor that ignores inputs and outputs all zeros."""

-  def __init__(self, is_training, num_classes, predict_mask=False):
+  def __init__(self, is_training, num_classes, add_background_class=True,
+               predict_mask=False):
    super(MockBoxPredictor, self).__init__(is_training, num_classes)
+    self._add_background_class = add_background_class
    self._predict_mask = predict_mask

  def _predict(self, image_features, num_predictions_per_location):
@@ -57,10 +59,13 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
    num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
    code_size = 4
    zero = tf.reduce_sum(0 * image_feature)
+    num_class_slots = self.num_classes
+    if self._add_background_class:
+      num_class_slots = num_class_slots + 1
    box_encodings = zero + tf.zeros(
        (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
    class_predictions_with_background = zero + tf.zeros(
-        (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
+        (batch_size, num_anchors, num_class_slots), dtype=tf.float32)
    masks = zero + tf.zeros(
        (batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
         DEFAULT_MASK_SIZE),
@@ -80,9 +85,11 @@ class MockBoxPredictor(box_predictor.BoxPredictor):
 class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
  """Simple box predictor that ignores inputs and outputs all zeros."""

-  def __init__(self, is_training, num_classes, predict_mask=False):
+  def __init__(self, is_training, num_classes, add_background_class=True,
+               predict_mask=False):
    super(MockKerasBoxPredictor, self).__init__(
        is_training, num_classes, False, False)
+    self._add_background_class = add_background_class
    self._predict_mask = predict_mask

  def _predict(self, image_features, **kwargs):
@@ -93,10 +100,13 @@ class MockKerasBoxPredictor(box_predictor.KerasBoxPredictor):
    num_anchors = (combined_feature_shape[1] * combined_feature_shape[2])
    code_size = 4
    zero = tf.reduce_sum(0 * image_feature)
+    num_class_slots = self.num_classes
+    if self._add_background_class:
+      num_class_slots = num_class_slots + 1
    box_encodings = zero + tf.zeros(
        (batch_size, num_anchors, 1, code_size), dtype=tf.float32)
    class_predictions_with_background = zero + tf.zeros(
-        (batch_size, num_anchors, self.num_classes + 1), dtype=tf.float32)
+        (batch_size, num_anchors, num_class_slots), dtype=tf.float32)
    masks = zero + tf.zeros(
        (batch_size, num_anchors, self.num_classes, DEFAULT_MASK_SIZE,
         DEFAULT_MASK_SIZE),