updates changes in object_detecion/cores directory.

e7de233b · Vivek Rathod · edcd29f2 · e7de233b · e7de233b · e7de233b
Commit e7de233b authored Oct 27, 2017 by Vivek Rathod
14 changed files
--- a/research/object_detection/core/BUILD
+++ b/research/object_detection/core/BUILD
@@ -264,6 +264,11 @@ py_library(
    srcs = ["data_decoder.py"],
 )

+py_library(
+    name = "data_parser",
+    srcs = ["data_parser.py"],
+)
+
 py_library(
    name = "box_predictor",
    srcs = ["box_predictor.py"],

--- a/research/object_detection/core/box_list_ops.py
+++ b/research/object_detection/core/box_list_ops.py
@@ -584,7 +584,8 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
        ['Incorrect field size: actual vs expected.', num_entries, num_boxes])

    with tf.control_dependencies([length_assert]):
-      # TODO: Remove with tf.device when top_k operation runs correctly on GPU.
+      # TODO: Remove with tf.device when top_k operation runs
+      # correctly on GPU.
      with tf.device('/cpu:0'):
        _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)

@@ -655,7 +656,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
  This op keeps the collection of boxes whose corresponding scores are
  greater than the input threshold.

-  TODO: Change function name to FilterScoresGreaterThan
+  TODO: Change function name to filter_scores_greater_than

  Args:
    boxlist: BoxList holding N boxes.  Must contain a 'scores' field
@@ -772,18 +773,25 @@ def to_normalized_coordinates(boxlist, height, width,
    return scale(boxlist, 1 / height, 1 / width)


-def to_absolute_coordinates(boxlist, height, width,
-                            check_range=True, scope=None):
+def to_absolute_coordinates(boxlist,
+                            height,
+                            width,
+                            check_range=True,
+                            maximum_normalized_coordinate=1.01,
+                            scope=None):
  """Converts normalized box coordinates to absolute pixel coordinates.

  This function raises an assertion failed error when the maximum box coordinate
-  value is larger than 1.01 (in which case coordinates are already absolute).
+  value is larger than maximum_normalized_coordinate (in which case coordinates
+  are already absolute).

  Args:
    boxlist: BoxList with coordinates in range [0, 1].
    height: Maximum value for height of absolute box coordinates.
    width: Maximum value for width of absolute box coordinates.
    check_range: If True, checks if the coordinates are normalized or not.
+    maximum_normalized_coordinate: Maximum coordinate value to be considered
+      as normalized, default to 1.01.
    scope: name scope.

  Returns:
@@ -797,9 +805,10 @@ def to_absolute_coordinates(boxlist, height, width,
    # Ensure range of input boxes is correct.
    if check_range:
      box_maximum = tf.reduce_max(boxlist.get())
-      max_assert = tf.Assert(tf.greater_equal(1.01, box_maximum),
+      max_assert = tf.Assert(
+          tf.greater_equal(maximum_normalized_coordinate, box_maximum),
          ['maximum box coordinate value is larger '
-                              'than 1.01: ', box_maximum])
+           'than %f: ' % maximum_normalized_coordinate, box_maximum])
      with tf.control_dependencies([max_assert]):
        width = tf.identity(width)

@@ -927,9 +936,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
  iou_ = iou(selected_boxes, pool_boxes)
  match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
  num_matches = tf.reduce_sum(match_indicator, 1)
-  # TODO: Handle the case where some boxes in selected_boxes do not match to any
-  # boxes in pool_boxes. For such boxes without any matches, we should return
-  # the original boxes without voting.
+  # TODO: Handle the case where some boxes in selected_boxes do not
+  # match to any boxes in pool_boxes. For such boxes without any matches, we
+  # should return the original boxes without voting.
  match_assert = tf.Assert(
      tf.reduce_all(tf.greater(num_matches, 0)),
      ['Each box in selected_boxes must match with at least one box '

--- a/research/object_detection/core/box_predictor.py
+++ b/research/object_detection/core/box_predictor.py
@@ -278,6 +278,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
               box_code_size,
               conv_hyperparams=None,
               predict_instance_masks=False,
+               mask_height=14,
+               mask_width=14,
               mask_prediction_conv_depth=256,
               predict_keypoints=False):
    """Constructor.
@@ -300,6 +302,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
        ops.
      predict_instance_masks: Whether to predict object masks inside detection
        boxes.
+      mask_height: Desired output mask height. The default value is 14.
+      mask_width: Desired output mask width. The default value is 14.
      mask_prediction_conv_depth: The depth for the first conv2d_transpose op
        applied to the image_features in the mask prediciton branch.
      predict_keypoints: Whether to predict keypoints insde detection boxes.
@@ -315,10 +319,10 @@ class MaskRCNNBoxPredictor(BoxPredictor):
    self._dropout_keep_prob = dropout_keep_prob
    self._conv_hyperparams = conv_hyperparams
    self._predict_instance_masks = predict_instance_masks
+    self._mask_height = mask_height
+    self._mask_width = mask_width
    self._mask_prediction_conv_depth = mask_prediction_conv_depth
    self._predict_keypoints = predict_keypoints
-    if self._predict_instance_masks:
-      raise ValueError('Mask prediction is unimplemented.')
    if self._predict_keypoints:
      raise ValueError('Keypoint prediction is unimplemented.')
    if ((self._predict_instance_masks or self._predict_keypoints) and
@@ -339,6 +343,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
    have been folded into the batch dimension.  Thus we output 1 for the
    anchors dimension.

+    Also optionally predicts instance masks.
+    The mask prediction head is based on the Mask RCNN paper with the following
+    modifications: We replace the deconvolution layer with a bilinear resize
+    and a convolution.
+
    Args:
      image_features: A float tensor of shape [batch_size, height, width,
        channels] containing features for a batch of images.
@@ -397,15 +406,18 @@ class MaskRCNNBoxPredictor(BoxPredictor):

    if self._predict_instance_masks:
      with slim.arg_scope(self._conv_hyperparams):
-        upsampled_features = slim.conv2d_transpose(
+        upsampled_features = tf.image.resize_bilinear(
            image_features,
+            [self._mask_height, self._mask_width],
+            align_corners=True)
+        upsampled_features = slim.conv2d(
+            upsampled_features,
            num_outputs=self._mask_prediction_conv_depth,
-            kernel_size=[2, 2],
-            stride=2)
+            kernel_size=[2, 2])
        mask_predictions = slim.conv2d(upsampled_features,
                                       num_outputs=self.num_classes,
                                       activation_fn=None,
-                                       kernel_size=[1, 1])
+                                       kernel_size=[3, 3])
        instance_masks = tf.expand_dims(tf.transpose(mask_predictions,
                                                     perm=[0, 3, 1, 2]),
                                        axis=1,
@@ -437,7 +449,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
               dropout_keep_prob,
               kernel_size,
               box_code_size,
-               apply_sigmoid_to_scores=False):
+               apply_sigmoid_to_scores=False,
+               class_prediction_bias_init=0.0):
    """Constructor.

    Args:
@@ -464,6 +477,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
      box_code_size: Size of encoding for each box.
      apply_sigmoid_to_scores: if True, apply the sigmoid on the output
        class_predictions.
+      class_prediction_bias_init: constant value to initialize bias of the last
+        conv2d layer before class prediction.

    Raises:
      ValueError: if min_depth > max_depth.
@@ -480,6 +495,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
    self._box_code_size = box_code_size
    self._dropout_keep_prob = dropout_keep_prob
    self._apply_sigmoid_to_scores = apply_sigmoid_to_scores
+    self._class_prediction_bias_init = class_prediction_bias_init

  def _predict(self, image_features, num_predictions_per_location):
    """Computes encoded object locations and corresponding confidences.
@@ -499,15 +515,16 @@ class ConvolutionalBoxPredictor(BoxPredictor):
          [batch_size, num_anchors, num_classes + 1] representing the class
          predictions for the proposals.
    """
-    features_depth = static_shape.get_depth(image_features.get_shape())
-    depth = max(min(features_depth, self._max_depth), self._min_depth)
-
    # Add a slot for the background class.
    num_class_slots = self.num_classes + 1
    net = image_features
    with slim.arg_scope(self._conv_hyperparams), \
         slim.arg_scope([slim.dropout], is_training=self._is_training):
-      # Add additional conv layers before the predictor.
+      # Add additional conv layers before the class predictor.
+      features_depth = static_shape.get_depth(image_features.get_shape())
+      depth = max(min(features_depth, self._max_depth), self._min_depth)
+      tf.logging.info('depth of additional conv before box predictor: {}'.
+                      format(depth))
      if depth > 0 and self._num_layers_before_predictor > 0:
        for i in range(self._num_layers_before_predictor):
          net = slim.conv2d(
@@ -522,7 +539,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
          net = slim.dropout(net, keep_prob=self._dropout_keep_prob)
        class_predictions_with_background = slim.conv2d(
            net, num_predictions_per_location * num_class_slots,
-            [self._kernel_size, self._kernel_size], scope='ClassPredictor')
+            [self._kernel_size, self._kernel_size], scope='ClassPredictor',
+            biases_initializer=tf.constant_initializer(
+                self._class_prediction_bias_init))
        if self._apply_sigmoid_to_scores:
          class_predictions_with_background = tf.sigmoid(
              class_predictions_with_background)

--- a/research/object_detection/core/data_parser.py
+++ b/research/object_detection/core/data_parser.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface for data parsers.
+
+Data parser parses input data and returns a dictionary of numpy arrays
+keyed by the entries in standard_fields.py. Since the parser parses records
+to numpy arrays (materialized tensors) directly, it is used to read data for
+evaluation/visualization; to parse the data during training, DataDecoder should
+be used.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+
+class DataToNumpyParser(object):
+  __metaclass__ = ABCMeta
+
+  @abstractmethod
+  def parse(self, input_data):
+    """Parses input and returns a numpy array or a dictionary of numpy arrays.
+
+    Args:
+      input_data: an input data
+
+    Returns:
+      A numpy array or a dictionary of numpy arrays or None, if input
+      cannot be parsed.
+    """
+    pass
--- a/research/object_detection/core/keypoint_ops.py
+++ b/research/object_detection/core/keypoint_ops.py
@@ -229,3 +229,54 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
    new_keypoints = tf.concat([v, u], 2)
    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
    return new_keypoints
+
+
+def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
+  """Flips the keypoints vertically around the flip_point.
+
+  This operation flips the y coordinate for each keypoint around the flip_point
+  and also permutes the keypoints in a manner specified by flip_permutation.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    flip_point:  (float) scalar tensor representing the y coordinate to flip the
+      keypoints around.
+    flip_permutation: rank 1 int32 tensor containing the keypoint flip
+      permutation. This specifies the mapping from original keypoint indices
+      to the flipped keypoint indices. This is used primarily for keypoints
+      that are not reflection invariant. E.g. Suppose there are 3 keypoints
+      representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+      flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+      and 'right_eye' after a horizontal flip.
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'FlipVertical'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    keypoints = tf.gather(keypoints, flip_permutation)
+    v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+    v = flip_point * 2.0 - v
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
+
+
+def rot90(keypoints, scope=None):
+  """Rotates the keypoints counter-clockwise by 90 degrees.
+
+  Args:
+    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+    scope: name scope.
+
+  Returns:
+    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+  """
+  with tf.name_scope(scope, 'Rot90'):
+    keypoints = tf.transpose(keypoints, [1, 0, 2])
+    v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
+    v = 1.0 - v
+    new_keypoints = tf.concat([v, u], 2)
+    new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+    return new_keypoints
--- a/research/object_detection/core/keypoint_ops_test.py
+++ b/research/object_detection/core/keypoint_ops_test.py
@@ -163,6 +163,38 @@ class KeypointOpsTest(tf.test.TestCase):
      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
      self.assertAllClose(output_, expected_keypoints_)

+  def test_flip_vertical(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+    ])
+    flip_permutation = [0, 2, 1]
+
+    expected_keypoints = tf.constant([
+        [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
+        [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
+    ])
+    output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+
+  def test_rot90(self):
+    keypoints = tf.constant([
+        [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+        [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
+    ])
+    expected_keypoints = tf.constant([
+        [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
+        [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
+    ])
+    output = keypoint_ops.rot90(keypoints)
+
+    with self.test_session() as sess:
+      output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+      self.assertAllClose(output_, expected_keypoints_)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/core/losses.py
+++ b/research/object_detection/core/losses.py
@@ -72,7 +72,7 @@ class Loss(object):

  @abstractmethod
  def _compute_loss(self, prediction_tensor, target_tensor, **params):
-    """Method to be overriden by implementations.
+    """Method to be overridden by implementations.

    Args:
      prediction_tensor: a tensor representing predicted quantities
@@ -238,17 +238,85 @@ class WeightedSigmoidClassificationLoss(Loss):
    return tf.reduce_sum(per_entry_cross_ent * weights)


+class SigmoidFocalClassificationLoss(Loss):
+  """Sigmoid focal cross entropy loss.
+
+  Focal loss down-weights well classified examples and focusses on the hard
+  examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
+  """
+
+  def __init__(self, anchorwise_output=False, gamma=2.0, alpha=0.25):
+    """Constructor.
+
+    Args:
+      anchorwise_output: Outputs loss per anchor. (default False)
+      gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
+      alpha: optional alpha weighting factor to balance positives vs negatives.
+    """
+    self._anchorwise_output = anchorwise_output
+    self._alpha = alpha
+    self._gamma = gamma
+
+  def _compute_loss(self,
+                    prediction_tensor,
+                    target_tensor,
+                    weights,
+                    class_indices=None):
+    """Compute loss function.
+
+    Args:
+      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing the predicted logits for each class
+      target_tensor: A float tensor of shape [batch_size, num_anchors,
+        num_classes] representing one-hot encoded classification targets
+      weights: a float tensor of shape [batch_size, num_anchors]
+      class_indices: (Optional) A 1-D integer tensor of class indices.
+        If provided, computes loss only for the specified class indices.
+
+    Returns:
+      loss: a (scalar) tensor representing the value of the loss function
+            or a float tensor of shape [batch_size, num_anchors]
+    """
+    weights = tf.expand_dims(weights, 2)
+    if class_indices is not None:
+      weights *= tf.reshape(
+          ops.indices_to_dense_vector(class_indices,
+                                      tf.shape(prediction_tensor)[2]),
+          [1, 1, -1])
+    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=target_tensor, logits=prediction_tensor))
+    prediction_probabilities = tf.sigmoid(prediction_tensor)
+    p_t = ((target_tensor * prediction_probabilities) +
+           ((1 - target_tensor) * (1 - prediction_probabilities)))
+    modulating_factor = 1.0
+    if self._gamma:
+      modulating_factor = tf.pow(1.0 - p_t, self._gamma)
+    alpha_weight_factor = 1.0
+    if self._alpha is not None:
+      alpha_weight_factor = (target_tensor * self._alpha +
+                             (1 - target_tensor) * (1 - self._alpha))
+    focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
+                                per_entry_cross_ent)
+    if self._anchorwise_output:
+      return tf.reduce_sum(focal_cross_entropy_loss * weights, 2)
+    return tf.reduce_sum(focal_cross_entropy_loss * weights)
+
+
 class WeightedSoftmaxClassificationLoss(Loss):
  """Softmax loss function."""

-  def __init__(self, anchorwise_output=False):
+  def __init__(self, anchorwise_output=False, logit_scale=1.0):
    """Constructor.

    Args:
      anchorwise_output: Whether to output loss per anchor (default False)
+      logit_scale: When this value is high, the prediction is "diffused" and
+                   when this value is low, the prediction is made peakier.
+                   (default 1.0)

    """
    self._anchorwise_output = anchorwise_output
+    self._logit_scale = logit_scale

  def _compute_loss(self, prediction_tensor, target_tensor, weights):
    """Compute loss function.
@@ -264,6 +332,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
      loss: a (scalar) tensor representing the value of the loss function
    """
    num_classes = prediction_tensor.get_shape().as_list()[-1]
+    prediction_tensor = tf.divide(
+        prediction_tensor, self._logit_scale, name='scale_logit')
    per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.reshape(target_tensor, [-1, num_classes]),
        logits=tf.reshape(prediction_tensor, [-1, num_classes])))

--- a/research/object_detection/core/losses_test.py
+++ b/research/object_detection/core/losses_test.py
@@ -225,6 +225,286 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
      self.assertAllClose(loss_output, exp_loss)


+def _logit(probability):
+  return math.log(probability / (1. - probability))
+
+
+class SigmoidFocalClassificationLossTest(tf.test.TestCase):
+
+  def testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.97)],
+                                      [_logit(0.90)],
+                                      [_logit(0.73)],
+                                      [_logit(0.27)],
+                                      [_logit(0.09)],
+                                      [_logit(0.03)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAllClose(order_of_ratio, [[1000, 100, 10, 10, 100, 1000]])
+
+  def testHardExamplesProduceLossComparableToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAllClose(order_of_ratio, [[1., 1., 1., 1., 1.]])
+
+  def testNonAnchorWiseOutputComparableToSigmoidXEntropy(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, gamma=2.0, alpha=None)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=False)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss / focal_loss)))
+      self.assertAlmostEqual(order_of_ratio, 1.)
+
+  def testIgnoreNegativeExampleLossViaAlphaMultiplier(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=1.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(focal_loss[0][3:], [0., 0.])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss[0][:3] /
+                                                  focal_loss[0][:3])))
+      self.assertAllClose(order_of_ratio, [1., 1., 1.])
+
+  def testIgnorePositiveExampleLossViaAlphaMultiplier(self):
+    prediction_tensor = tf.constant([[[_logit(0.55)],
+                                      [_logit(0.52)],
+                                      [_logit(0.50)],
+                                      [_logit(0.48)],
+                                      [_logit(0.45)]]], tf.float32)
+    target_tensor = tf.constant([[[1],
+                                  [1],
+                                  [1],
+                                  [0],
+                                  [0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, gamma=2.0, alpha=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(focal_loss[0][:3], [0., 0., 0.])
+      order_of_ratio = np.power(10,
+                                np.floor(np.log10(sigmoid_loss[0][3:] /
+                                                  focal_loss[0][3:])))
+      self.assertAllClose(order_of_ratio, [1., 1.])
+
+  def testSimilarToSigmoidXEntropyWithHalfAlphaAndZeroGammaUpToAScale(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, alpha=0.5, gamma=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(sigmoid_loss, focal_loss * 2)
+
+  def testSameAsSigmoidXEntropyWithNoAlphaAndZeroGamma(self):
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [100, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 100],
+                                      [-100, 100, -100],
+                                      [100, 100, 100],
+                                      [0, 0, -1]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 1, 1],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 0]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=True, alpha=None, gamma=0.0)
+    sigmoid_loss_op = losses.WeightedSigmoidClassificationLoss(
+        anchorwise_output=True)
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    sigmoid_loss = sigmoid_loss_op(prediction_tensor, target_tensor,
+                                   weights=weights)
+
+    with self.test_session() as sess:
+      sigmoid_loss, focal_loss = sess.run([sigmoid_loss, focal_loss])
+      self.assertAllClose(sigmoid_loss, focal_loss)
+
+  def testExpectedLossWithAlphaOneAndZeroGamma(self):
+    # All zeros correspond to 0.5 probability.
+    prediction_tensor = tf.constant([[[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]],
+                                     [[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, alpha=1.0, gamma=0.0)
+
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    with self.test_session() as sess:
+      focal_loss = sess.run(focal_loss)
+      self.assertAllClose(
+          (-math.log(.5) *  # x-entropy per class per anchor
+           1.0 *            # alpha
+           8),              # positives from 8 anchors
+          focal_loss)
+
+  def testExpectedLossWithAlpha75AndZeroGamma(self):
+    # All zeros correspond to 0.5 probability.
+    prediction_tensor = tf.constant([[[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]],
+                                     [[0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0],
+                                      [0, 0, 0]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    focal_loss_op = losses.SigmoidFocalClassificationLoss(
+        anchorwise_output=False, alpha=0.75, gamma=0.0)
+
+    focal_loss = focal_loss_op(prediction_tensor, target_tensor,
+                               weights=weights)
+    with self.test_session() as sess:
+      focal_loss = sess.run(focal_loss)
+      self.assertAllClose(
+          (-math.log(.5) *  # x-entropy per class per anchor.
+           ((0.75 *         # alpha for positives.
+             8) +           # positives from 8 anchors.
+            (0.25 *         # alpha for negatives.
+             8 * 2))),      # negatives from 8 anchors for two classes.
+          focal_loss)
+
+
 class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):

  def testReturnsCorrectLoss(self):
@@ -282,6 +562,39 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
      loss_output = sess.run(loss)
      self.assertAllClose(loss_output, exp_loss)

+  def testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting(self):
+    """At very high logit_scale, all predictions will be ~0.33."""
+    # TODO(yonib): Also test logit_scale with anchorwise=False.
+    logit_scale = 10e16
+    prediction_tensor = tf.constant([[[-100, 100, -100],
+                                      [100, -100, -100],
+                                      [0, 0, -100],
+                                      [-100, -100, 100]],
+                                     [[-100, 0, 0],
+                                      [-100, 100, -100],
+                                      [-100, 100, -100],
+                                      [100, -100, -100]]], tf.float32)
+    target_tensor = tf.constant([[[0, 1, 0],
+                                  [1, 0, 0],
+                                  [1, 0, 0],
+                                  [0, 0, 1]],
+                                 [[0, 0, 1],
+                                  [0, 1, 0],
+                                  [0, 1, 0],
+                                  [1, 0, 0]]], tf.float32)
+    weights = tf.constant([[1, 1, 1, 1],
+                           [1, 1, 1, 1]], tf.float32)
+    loss_op = losses.WeightedSoftmaxClassificationLoss(
+        anchorwise_output=True, logit_scale=logit_scale)
+    loss = loss_op(prediction_tensor, target_tensor, weights=weights)
+
+    uniform_distribution_loss = - math.log(.33333333333)
+    exp_loss = np.matrix([[uniform_distribution_loss] * 4,
+                          [uniform_distribution_loss] * 4])
+    with self.test_session() as sess:
+      loss_output = sess.run(loss)
+      self.assertAllClose(loss_output, exp_loss)
+

 class BootstrappedSigmoidClassificationLossTest(tf.test.TestCase):


--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Abstract detection model.

 This file defines a generic base class for detection models.  Programs that are
@@ -87,6 +86,18 @@ class DetectionModel(object):
      raise RuntimeError('Groundtruth tensor %s has not been provided', field)
    return self._groundtruth_lists[field]

+  def groundtruth_has_field(self, field):
+    """Determines whether the groundtruth includes the given field.
+
+    Args:
+      field: a string key, options are
+        fields.BoxListFields.{boxes,classes,masks,keypoints}
+
+    Returns:
+      True if the groundtruth includes the given field, False otherwise.
+    """
+    return field in self._groundtruth_lists
+
  @abstractmethod
  def preprocess(self, inputs):
    """Input preprocessing.
@@ -148,7 +159,8 @@ class DetectionModel(object):

    Outputs adhere to the following conventions:
    * Classes are integers in [0, num_classes); background classes are removed
-      and the first non-background class is mapped to 0.
+      and the first non-background class is mapped to 0. If the model produces
+      class-agnostic detections, then no output is produced for classes.
    * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
      format and normalized relative to the image window.
    * `num_detections` is provided for settings where detections are padded to a
@@ -168,6 +180,8 @@ class DetectionModel(object):
        detection_boxes: [batch, max_detections, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
+          (If a model is producing class-agnostic detections, this field may be
+          missing)
        instance_masks: [batch, max_detections, image_height, image_width]
          (optional)
        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
@@ -207,13 +221,13 @@ class DetectionModel(object):
      groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
        tensors of shape [num_boxes, num_classes] containing the class targets
        with the 0th index assumed to map to the first non-background class.
-      groundtruth_masks_list: a list of 2-D tf.float32 tensors of
-        shape [max_detections, height_in, width_in] containing instance
+      groundtruth_masks_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, height_in, width_in] containing instance
        masks with values in {0, 1}.  If None, no masks are provided.
        Mask resolution `height_in`x`width_in` must agree with the resolution
        of the input image tensor provided to the `preprocess` function.
-      groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
-        shape [batch, max_detections, num_keypoints, 2] containing keypoints.
+      groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, num_keypoints, 2] containing keypoints.
        Keypoints are assumed to be provided in normalized coordinates and
        missing keypoints should be encoded as NaN.
    """

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -18,6 +18,7 @@

 Specifies:
  InputDataFields: standard fields used by reader/preprocessor/batcher.
+  DetectionResultFields: standard fields returned by object detector.
  BoxListFields: standard field used by BoxList
  TfExampleFields: standard fields for tf-example data format (go/tf-example).
 """
@@ -41,12 +42,17 @@ class InputDataFields(object):
    groundtruth_boxes: coordinates of the ground truth boxes in the image.
    groundtruth_classes: box-level class labels.
    groundtruth_label_types: box-level label types (e.g. explicit negative).
-    groundtruth_is_crowd: is the groundtruth a single object or a crowd.
+    groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
+      is the groundtruth a single object or a crowd.
    groundtruth_area: area of a groundtruth segment.
    groundtruth_difficult: is a `difficult` object
+    groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
+      same class, forming a connected group, where instances are heavily
+      occluding each other.
    proposal_boxes: coordinates of object proposal boxes.
    proposal_objectness: objectness score of each proposal.
    groundtruth_instance_masks: ground truth instance masks.
+    groundtruth_instance_boundaries: ground truth instance boundaries.
    groundtruth_instance_classes: instance mask-level class labels.
    groundtruth_keypoints: ground truth keypoints.
    groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
@@ -64,15 +70,43 @@ class InputDataFields(object):
  groundtruth_is_crowd = 'groundtruth_is_crowd'
  groundtruth_area = 'groundtruth_area'
  groundtruth_difficult = 'groundtruth_difficult'
+  groundtruth_group_of = 'groundtruth_group_of'
  proposal_boxes = 'proposal_boxes'
  proposal_objectness = 'proposal_objectness'
  groundtruth_instance_masks = 'groundtruth_instance_masks'
+  groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
  groundtruth_instance_classes = 'groundtruth_instance_classes'
  groundtruth_keypoints = 'groundtruth_keypoints'
  groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
  groundtruth_label_scores = 'groundtruth_label_scores'


+class DetectionResultFields(object):
+  """Naming converntions for storing the output of the detector.
+
+  Attributes:
+    source_id: source of the original image.
+    key: unique key corresponding to image.
+    detection_boxes: coordinates of the detection boxes in the image.
+    detection_scores: detection scores for the detection boxes in the image.
+    detection_classes: detection-level class labels.
+    detection_masks: contains a segmentation mask for each detection box.
+    detection_boundaries: contains an object boundary for each detection box.
+    detection_keypoints: contains detection keypoints for each detection box.
+    num_detections: number of detections in the batch.
+  """
+
+  source_id = 'source_id'
+  key = 'key'
+  detection_boxes = 'detection_boxes'
+  detection_scores = 'detection_scores'
+  detection_classes = 'detection_classes'
+  detection_masks = 'detection_masks'
+  detection_boundaries = 'detection_boundaries'
+  detection_keypoints = 'detection_keypoints'
+  num_detections = 'num_detections'
+
+
 class BoxListFields(object):
  """Naming conventions for BoxLists.

@@ -83,6 +117,7 @@ class BoxListFields(object):
    weights: sample weights per bounding box.
    objectness: objectness score per bounding box.
    masks: masks per bounding box.
+    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
  """
@@ -92,6 +127,7 @@ class BoxListFields(object):
  weights = 'weights'
  objectness = 'objectness'
  masks = 'masks'
+  boundaries = 'boundaries'
  keypoints = 'keypoints'
  keypoint_heatmaps = 'keypoint_heatmaps'

@@ -112,7 +148,7 @@ class TfExampleFields(object):
    width: width of image in pixels, e.g. 581
    source_id: original source of the image
    object_class_text: labels in text format, e.g. ["person", "cat"]
-    object_class_text: labels in numbers, e.g. [16, 8]
+    object_class_label: labels in numbers, e.g. [16, 8]
    object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
    object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
    object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
@@ -121,10 +157,20 @@ class TfExampleFields(object):
    object_truncated: is object truncated, e.g. [true, false]
    object_occluded: is object occluded, e.g. [true, false]
    object_difficult: is object difficult, e.g. [true, false]
-    object_is_crowd: is the object a single object or a crowd
+    object_group_of: is object a single object or a group of objects
+    object_depiction: is object a depiction
+    object_is_crowd: [DEPRECATED, use object_group_of instead]
+      is the object a single object or a crowd
    object_segment_area: the area of the segment.
    instance_masks: instance segmentation masks.
+    instance_boundaries: instance boundaries.
    instance_classes: Classes for each instance segmentation mask.
+    detection_class_label: class label in numbers.
+    detection_bbox_ymin: ymin coordinates of a detection box.
+    detection_bbox_xmin: xmin coordinates of a detection box.
+    detection_bbox_ymax: ymax coordinates of a detection box.
+    detection_bbox_xmax: xmax coordinates of a detection box.
+    detection_score: detection score for the class label and box.
  """
  image_encoded = 'image/encoded'
  image_format = 'image/format'  # format is reserved keyword
@@ -144,7 +190,16 @@ class TfExampleFields(object):
  object_truncated = 'image/object/truncated'
  object_occluded = 'image/object/occluded'
  object_difficult = 'image/object/difficult'
+  object_group_of = 'image/object/group_of'
+  object_depiction = 'image/object/depiction'
  object_is_crowd = 'image/object/is_crowd'
  object_segment_area = 'image/object/segment/area'
  instance_masks = 'image/segmentation/object'
+  instance_boundaries = 'image/boundaries/object'
  instance_classes = 'image/segmentation/object/class'
+  detection_class_label = 'image/detection/label'
+  detection_bbox_ymin = 'image/detection/bbox/ymin'
+  detection_bbox_xmin = 'image/detection/bbox/xmin'
+  detection_bbox_ymax = 'image/detection/bbox/ymax'
+  detection_bbox_xmax = 'image/detection/bbox/xmax'
+  detection_score = 'image/detection/score'
--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -50,7 +50,7 @@ class TargetAssigner(object):
  def __init__(self, similarity_calc, matcher, box_coder,
               positive_class_weight=1.0, negative_class_weight=1.0,
               unmatched_cls_target=None):
-    """Construct Multibox Target Assigner.
+    """Construct Object Detection Target Assigner.

    Args:
      similarity_calc: a RegionSimilarityCalculator
@@ -108,7 +108,7 @@ class TargetAssigner(object):
    Args:
      anchors: a BoxList representing N anchors
      groundtruth_boxes: a BoxList representing M groundtruth boxes
-      groundtruth_labels:  a tensor of shape [num_gt_boxes, d_1, ... d_k]
+      groundtruth_labels:  a tensor of shape [M, d_1, ... d_k]
        with labels for each of the ground_truth boxes. The subshape
        [d_1, ... d_k] can be empty (corresponding to scalar inputs).  When set
        to None, groundtruth_labels assumes a binary problem where all
@@ -140,10 +140,16 @@ class TargetAssigner(object):
      groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
                                                  0))
      groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
-    shape_assert = tf.assert_equal(tf.shape(groundtruth_labels)[1:],
-                                   tf.shape(self._unmatched_cls_target))
-
-    with tf.control_dependencies([shape_assert]):
+    unmatched_shape_assert = tf.assert_equal(
+        tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target),
+        message='Unmatched class target shape incompatible '
+        'with groundtruth labels shape!')
+    labels_and_box_shapes_assert = tf.assert_equal(
+        tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(),
+        message='Groundtruth boxes and labels have incompatible shapes!')
+
+    with tf.control_dependencies(
+        [unmatched_shape_assert, labels_and_box_shapes_assert]):
      match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
                                                           anchors)
      match = self._matcher.match(match_quality_matrix, **params)
@@ -316,8 +322,8 @@ class TargetAssigner(object):
    return self._box_coder


-# TODO: This method pulls in all the implementation dependencies into core.
-# Therefore its best to have this factory method outside of core.
+# TODO: This method pulls in all the implementation dependencies into
+# core. Therefore its best to have this factory method outside of core.
 def create_target_assigner(reference, stage=None,
                           positive_class_weight=1.0,
                           negative_class_weight=1.0,

--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -327,6 +327,41 @@ class TargetAssignerTest(tf.test.TestCase):
      self.assertEquals(reg_weights_out.dtype, np.float32)
      self.assertEquals(matching_anchors_out.dtype, np.int32)

+  def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
+    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+    matcher = bipartite_matcher.GreedyBipartiteMatcher()
+    box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+    unmatched_cls_target = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+    target_assigner = targetassigner.TargetAssigner(
+        similarity_calc, matcher, box_coder,
+        unmatched_cls_target=unmatched_cls_target)
+
+    prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+                               [0.5, 0.5, 1.0, 0.8],
+                               [0, 0.5, .5, 1.0],
+                               [.75, 0, 1.0, .25]])
+    prior_stddevs = tf.constant(4 * [4 * [.1]])
+    priors = box_list.BoxList(prior_means)
+    priors.add_field('stddev', prior_stddevs)
+
+    box_corners = [[0.0, 0.0, 0.5, 0.5],
+                   [0.0, 0.0, 0.5, 0.8],
+                   [0.5, 0.5, 0.9, 0.9],
+                   [.75, 0, .95, .27]]
+    boxes = box_list.BoxList(tf.constant(box_corners))
+
+    groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+                                      [0, 0, 0, 0, 0, 1, 0],
+                                      [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+    result = target_assigner.assign(priors, boxes, groundtruth_labels,
+                                    num_valid_rows=3)
+    (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+    with self.test_session() as sess:
+      with self.assertRaisesWithPredicateMatch(
+          tf.errors.InvalidArgumentError,
+          'Groundtruth boxes and labels have incompatible shapes!'):
+        sess.run([cls_targets, cls_weights, reg_targets, reg_weights])
+
  def test_raises_error_on_invalid_groundtruth_labels(self):
    similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
    matcher = bipartite_matcher.GreedyBipartiteMatcher()