update post_processing module, builders, and meta architectures.

12714f88 · Vivek Rathod · c46caa56 · 12714f88 · 12714f88 · 12714f88
Commit 12714f88 authored Oct 28, 2017 by Vivek Rathod
12 changed files
--- a/research/object_detection/builders/post_processing_builder.py
+++ b/research/object_detection/builders/post_processing_builder.py
@@ -28,8 +28,8 @@ def build(post_processing_config):
  configuration.

  Non-max suppression callable takes `boxes`, `scores`, and optionally
-  `clip_window`, `parallel_iterations` and `scope` as inputs. It returns
-  `nms_boxes`, `nms_scores`, `nms_nms_classes` and `num_detections`. See
+  `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns
+  `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See
  post_processing.batch_multiclass_non_max_suppression for the type and shape
  of these tensors.

@@ -55,7 +55,8 @@ def build(post_processing_config):
  non_max_suppressor_fn = _build_non_max_suppressor(
      post_processing_config.batch_non_max_suppression)
  score_converter_fn = _build_score_converter(
-      post_processing_config.score_converter)
+      post_processing_config.score_converter,
+      post_processing_config.logit_scale)
  return non_max_suppressor_fn, score_converter_fn


@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config):
  return non_max_suppressor_fn


-def _build_score_converter(score_converter_config):
+def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale):
+  """Create a function to scale logits then apply a Tensorflow function."""
+  def score_converter_fn(logits):
+    scaled_logits = tf.divide(logits, logit_scale, name='scale_logits')
+    return tf_score_converter_fn(scaled_logits, name='convert_scores')
+  score_converter_fn.__name__ = '%s_with_logit_scale' % (
+      tf_score_converter_fn.__name__)
+  return score_converter_fn
+
+
+def _build_score_converter(score_converter_config, logit_scale):
  """Builds score converter based on the config.

  Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config):

  Args:
    score_converter_config: post_processing_pb2.PostProcessing.score_converter.
+    logit_scale: temperature to use for SOFTMAX score_converter.

  Returns:
    Callable score converter op.
@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config):
    ValueError: On unknown score converter.
  """
  if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
-    return tf.identity
+    return _score_converter_fn_with_logit_scale(tf.identity, logit_scale)
  if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
-    return tf.sigmoid
+    return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale)
  if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
-    return tf.nn.softmax
+    return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale)
  raise ValueError('Unknown score converter.')
--- a/research/object_detection/builders/post_processing_builder_test.py
+++ b/research/object_detection/builders/post_processing_builder_test.py
@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase):
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, score_converter = post_processing_builder.build(post_processing_config)
-    self.assertEqual(score_converter, tf.identity)
+    self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
+
+    inputs = tf.constant([1, 1], tf.float32)
+    outputs = score_converter(inputs)
+    with self.test_session() as sess:
+      converted_scores = sess.run(outputs)
+      expected_converted_scores = sess.run(inputs)
+      self.assertAllClose(converted_scores, expected_converted_scores)
+
+  def test_build_identity_score_converter_with_logit_scale(self):
+    post_processing_text_proto = """
+      score_converter: IDENTITY
+      logit_scale: 2.0
+    """
+    post_processing_config = post_processing_pb2.PostProcessing()
+    text_format.Merge(post_processing_text_proto, post_processing_config)
+    _, score_converter = post_processing_builder.build(post_processing_config)
+    self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
+
+    inputs = tf.constant([1, 1], tf.float32)
+    outputs = score_converter(inputs)
+    with self.test_session() as sess:
+      converted_scores = sess.run(outputs)
+      expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
+      self.assertAllClose(converted_scores, expected_converted_scores)

  def test_build_sigmoid_score_converter(self):
    post_processing_text_proto = """
@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase):
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, score_converter = post_processing_builder.build(post_processing_config)
-    self.assertEqual(score_converter, tf.sigmoid)
+    self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale')

  def test_build_softmax_score_converter(self):
    post_processing_text_proto = """
@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase):
    post_processing_config = post_processing_pb2.PostProcessing()
    text_format.Merge(post_processing_text_proto, post_processing_config)
    _, score_converter = post_processing_builder.build(post_processing_config)
-    self.assertEqual(score_converter, tf.nn.softmax)
+    self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
+
+  def test_build_softmax_score_converter_with_temperature(self):
+    post_processing_text_proto = """
+      score_converter: SOFTMAX
+      logit_scale: 2.0
+    """
+    post_processing_config = post_processing_pb2.PostProcessing()
+    text_format.Merge(post_processing_text_proto, post_processing_config)
+    _, score_converter = post_processing_builder.build(post_processing_config)
+    self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')


 if __name__ == '__main__':

--- a/research/object_detection/core/post_processing.py
+++ b/research/object_detection/core/post_processing.py
@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes,
    a BoxList holding M boxes with a rank-1 scores field representing
      corresponding scores for each box with scores sorted in decreasing order
      and a rank-1 classes field representing a class label for each box.
-      If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
-      contain masks, keypoints, keypoint_heatmaps corresponding to boxes.

  Raises:
    ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
+                                         additional_fields=None,
                                         scope=None,
                                         parallel_iterations=32):
  """Multi-class version of non maximum suppression that operates on a batch.
@@ -203,11 +202,13 @@ def batch_multiclass_non_max_suppression(boxes,
      is provided)
    num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
      [batch_size] representing the number of valid boxes to be considered
-        for each image in the batch.  This parameter allows for ignoring zero
-        paddings.
+      for each image in the batch.  This parameter allows for ignoring zero
+      paddings.
    masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
      float32 tensor containing box masks. `q` can be either number of classes
      or 1 depending on whether a separate mask is predicted per class.
+    additional_fields: (optional) If not None, a dictionary that maps keys to
+      tensors whose dimensions are [batch_size, num_anchors, ...].
    scope: tf scope name.
    parallel_iterations: (optional) number of batch items to process in
      parallel.
@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes,
      [batch_size, max_detections, mask_height, mask_width] float32 tensor
      containing masks for each selected box. This is set to None if input
      `masks` is None.
+    'nmsed_additional_fields': (optional) a dictionary of
+      [batch_size, max_detections, ...] float32 tensors corresponding to the
+      tensors specified in the input `additional_fields`. This is not returned
+      if input `additional_fields` is None.
    'num_detections': A [batch_size] int32 tensor indicating the number of
      valid detections per batch item. Only the top num_detections[i] entries in
-      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
+      nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
      entries are zero paddings.

  Raises:
@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes,
                     'to the third dimension of scores')

  original_masks = masks
+  original_additional_fields = additional_fields
  with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
    boxes_shape = boxes.shape
    batch_size = boxes_shape[0].value
@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes,
      num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors

    # If masks aren't provided, create dummy masks so we can only have one copy
-    # of single_image_nms_fn and discard the dummy masks after map_fn.
+    # of _single_image_nms_fn and discard the dummy masks after map_fn.
    if masks is None:
      masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
      masks = tf.zeros(masks_shape)

-    def single_image_nms_fn(args):
-      """Runs NMS on a single image and returns padded output."""
-      (per_image_boxes, per_image_scores, per_image_masks,
-       per_image_num_valid_boxes) = args
+    if additional_fields is None:
+      additional_fields = {}
+
+    def _single_image_nms_fn(args):
+      """Runs NMS on a single image and returns padded output.
+
+      Args:
+        args: A list of tensors consisting of the following:
+          per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
+            detections. If `q` is 1 then same boxes are used for all classes
+            otherwise, if `q` is equal to number of classes, class-specific
+            boxes are used.
+          per_image_scores - A [num_anchors, num_classes] float32 tensor
+            containing the scores for each of the `num_anchors` detections.
+          per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
+            tensor containing box masks. `q` can be either number of classes
+            or 1 depending on whether a separate mask is predicted per class.
+          per_image_additional_fields - (optional) A variable number of float32
+            tensors each with size [num_anchors, ...].
+          per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
+            shape [batch_size] representing the number of valid boxes to be
+            considered for each image in the batch.  This parameter allows for
+            ignoring zero paddings.
+
+      Returns:
+        'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
+          non-max suppressed boxes.
+        'nmsed_scores': A [max_detections] float32 tensor containing the scores
+          for the boxes.
+        'nmsed_classes': A [max_detections] float32 tensor containing the class
+          for boxes.
+        'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
+          float32 tensor containing masks for each selected box. This is set to
+          None if input `masks` is None.
+        'nmsed_additional_fields':  (optional) A variable number of float32
+          tensors each with size [max_detections, ...] corresponding to the
+          input `per_image_additional_fields`.
+        'num_detections': A [batch_size] int32 tensor indicating the number of
+          valid detections per batch item. Only the top num_detections[i]
+          entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
+          rest of the entries are zero paddings.
+      """
+      per_image_boxes = args[0]
+      per_image_scores = args[1]
+      per_image_masks = args[2]
+      per_image_additional_fields = {
+          key: value
+          for key, value in zip(additional_fields, args[3:-1])
+      }
+      per_image_num_valid_boxes = args[-1]
      per_image_boxes = tf.reshape(
          tf.slice(per_image_boxes, 3 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes,
          tf.slice(per_image_scores, [0, 0],
                   tf.stack([per_image_num_valid_boxes, -1])),
          [-1, num_classes])
-
      per_image_masks = tf.reshape(
          tf.slice(per_image_masks, 4 * [0],
                   tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
          [-1, q, per_image_masks.shape[2].value,
           per_image_masks.shape[3].value])
+      if per_image_additional_fields is not None:
+        for key, tensor in per_image_additional_fields.items():
+          additional_field_shape = tensor.get_shape()
+          additional_field_dim = len(additional_field_shape)
+          per_image_additional_fields[key] = tf.reshape(
+              tf.slice(per_image_additional_fields[key],
+                       additional_field_dim * [0],
+                       tf.stack([per_image_num_valid_boxes] +
+                                (additional_field_dim - 1) * [-1])),
+              [-1] + [dim.value for dim in additional_field_shape[1:]])
      nmsed_boxlist = multiclass_non_max_suppression(
          per_image_boxes,
          per_image_scores,
@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes,
          iou_thresh,
          max_size_per_class,
          max_total_size,
-          masks=per_image_masks,
          clip_window=clip_window,
-          change_coordinate_frame=change_coordinate_frame)
+          change_coordinate_frame=change_coordinate_frame,
+          masks=per_image_masks,
+          additional_fields=per_image_additional_fields)
      padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
                                                         max_total_size)
      num_detections = nmsed_boxlist.num_boxes()
@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes,
      nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
      nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
      nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
-      return [nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-              num_detections]
+      nmsed_additional_fields = [
+          padded_boxlist.get_field(key) for key in per_image_additional_fields
+      ]
+      return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
+              nmsed_additional_fields + [num_detections])
+
+    num_additional_fields = 0
+    if additional_fields is not None:
+      num_additional_fields = len(additional_fields)
+    num_nmsed_outputs = 4 + num_additional_fields

-    (batch_nmsed_boxes, batch_nmsed_scores,
-     batch_nmsed_classes, batch_nmsed_masks,
-     batch_num_detections) = tf.map_fn(
-         single_image_nms_fn,
-         elems=[boxes, scores, masks, num_valid_boxes],
-         dtype=[tf.float32, tf.float32, tf.float32, tf.float32, tf.int32],
-         parallel_iterations=parallel_iterations)
+    batch_outputs = tf.map_fn(
+        _single_image_nms_fn,
+        elems=([boxes, scores, masks] + list(additional_fields.values()) +
+               [num_valid_boxes]),
+        dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
+        parallel_iterations=parallel_iterations)
+
+    batch_nmsed_boxes = batch_outputs[0]
+    batch_nmsed_scores = batch_outputs[1]
+    batch_nmsed_classes = batch_outputs[2]
+    batch_nmsed_masks = batch_outputs[3]
+    batch_nmsed_additional_fields = {
+        key: value
+        for key, value in zip(additional_fields, batch_outputs[4:-1])
+    }
+    batch_num_detections = batch_outputs[-1]

    if original_masks is None:
      batch_nmsed_masks = None

+    if original_additional_fields is None:
+      batch_nmsed_additional_fields = None
+
    return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
-            batch_nmsed_masks, batch_num_detections)
+            batch_nmsed_masks, batch_nmsed_additional_fields,
+            batch_num_detections)
--- a/research/object_detection/core/post_processing_test.py
+++ b/research/object_detection/core/post_processing_test.py
@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
    exp_nms_classes = [[0, 0, 1, 0]]

    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-     num_detections) = post_processing.batch_multiclass_non_max_suppression(
-         boxes, scores, score_thresh, iou_thresh,
-         max_size_per_class=max_output_size, max_total_size=max_output_size)
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)

    self.assertIsNone(nmsed_masks)
+    self.assertIsNone(nmsed_additional_fields)

    with self.test_session() as sess:
      (nmsed_boxes, nmsed_scores, nmsed_classes,
@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
                                [1, 0, 0, 0]])

    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-     num_detections) = post_processing.batch_multiclass_non_max_suppression(
-         boxes, scores, score_thresh, iou_thresh,
-         max_size_per_class=max_output_size, max_total_size=max_output_size)
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size)

    self.assertIsNone(nmsed_masks)
+    self.assertIsNone(nmsed_additional_fields)
    # Check static shapes
    self.assertAllEqual(nmsed_boxes.shape.as_list(),
                        exp_nms_corners.shape)
@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
                               [[0, 0], [0, 0]]]])

    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-     num_detections) = post_processing.batch_multiclass_non_max_suppression(
-         boxes, scores, score_thresh, iou_thresh,
-         max_size_per_class=max_output_size, max_total_size=max_output_size,
-         masks=masks)
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        masks=masks)

+    self.assertIsNone(nmsed_additional_fields)
    # Check static shapes
    self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
    self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
      self.assertAllClose(num_detections, [2, 3])
      self.assertAllClose(nmsed_masks, exp_nms_masks)

+  def test_batch_multiclass_nms_with_additional_fields(self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    additional_fields = {
+        'keypoints': tf.constant(
+            [[[[6, 7], [8, 9]],
+              [[0, 1], [2, 3]],
+              [[0, 0], [0, 0]],
+              [[0, 0], [0, 0]]],
+             [[[13, 14], [15, 16]],
+              [[8, 9], [10, 11]],
+              [[10, 11], [12, 13]],
+              [[0, 0], [0, 0]]]],
+            tf.float32)
+    }
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = np.array([[[0, 10, 1, 11],
+                                 [0, 0, 1, 1],
+                                 [0, 0, 0, 0],
+                                 [0, 0, 0, 0]],
+                                [[0, 999, 2, 1004],
+                                 [0, 10.1, 1, 11.1],
+                                 [0, 100, 1, 101],
+                                 [0, 0, 0, 0]]])
+    exp_nms_scores = np.array([[.95, .9, 0, 0],
+                               [.85, .5, .3, 0]])
+    exp_nms_classes = np.array([[0, 0, 0, 0],
+                                [1, 0, 0, 0]])
+    exp_nms_additional_fields = {
+        'keypoints': np.array([[[[0, 0], [0, 0]],
+                                [[6, 7], [8, 9]],
+                                [[0, 0], [0, 0]],
+                                [[0, 0], [0, 0]]],
+                               [[[10, 11], [12, 13]],
+                                [[13, 14], [15, 16]],
+                                [[8, 9], [10, 11]],
+                                [[0, 0], [0, 0]]]])
+    }
+
+    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        additional_fields=additional_fields)
+
+    self.assertIsNone(nmsed_masks)
+    # Check static shapes
+    self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
+    self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
+    self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
+    self.assertEqual(len(nmsed_additional_fields),
+                     len(exp_nms_additional_fields))
+    for key in exp_nms_additional_fields:
+      self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
+                          exp_nms_additional_fields[key].shape)
+    self.assertEqual(num_detections.shape.as_list(), [2])
+
+    with self.test_session() as sess:
+      (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
+       num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+                                   nmsed_additional_fields, num_detections])
+
+      self.assertAllClose(nmsed_boxes, exp_nms_corners)
+      self.assertAllClose(nmsed_scores, exp_nms_scores)
+      self.assertAllClose(nmsed_classes, exp_nms_classes)
+      for key in exp_nms_additional_fields:
+        self.assertAllClose(nmsed_additional_fields[key],
+                            exp_nms_additional_fields[key])
+      self.assertAllClose(num_detections, [2, 3])
+
  def test_batch_multiclass_nms_with_dynamic_batch_size(self):
    boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
    scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
                               [[0, 0], [0, 0]]]])

    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-     num_detections) = post_processing.batch_multiclass_non_max_suppression(
-         boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
-         max_size_per_class=max_output_size, max_total_size=max_output_size,
-         masks=masks_placeholder)
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        masks=masks_placeholder)

+    self.assertIsNone(nmsed_additional_fields)
    # Check static shapes
    self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
    self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
@@ -765,10 +858,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
                      [[0, 0], [0, 0]]]]

    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
-     num_detections) = post_processing.batch_multiclass_non_max_suppression(
-         boxes, scores, score_thresh, iou_thresh,
-         max_size_per_class=max_output_size, max_total_size=max_output_size,
-         num_valid_boxes=num_valid_boxes, masks=masks)
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        num_valid_boxes=num_valid_boxes, masks=masks)
+
+    self.assertIsNone(nmsed_additional_fields)

    with self.test_session() as sess:
      (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
      self.assertAllClose(num_detections, [1, 1])
      self.assertAllClose(nmsed_masks, exp_nms_masks)

+  def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
+      self):
+    boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+                          [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+                          [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+                          [[0, 10, 1, 11], [0, 10, 1, 11]]],
+                         [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+                          [[0, 100, 1, 101], [0, 100, 1, 101]],
+                          [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+                          [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+                        tf.float32)
+    scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+                           [.6, 0.01], [.95, 0]],
+                          [[.5, 0.01], [.3, 0.01],
+                           [.01, .85], [.01, .5]]])
+    additional_fields = {
+        'keypoints': tf.constant(
+            [[[[6, 7], [8, 9]],
+              [[0, 1], [2, 3]],
+              [[0, 0], [0, 0]],
+              [[0, 0], [0, 0]]],
+             [[[13, 14], [15, 16]],
+              [[8, 9], [10, 11]],
+              [[10, 11], [12, 13]],
+              [[0, 0], [0, 0]]]],
+            tf.float32)
+    }
+    num_valid_boxes = tf.constant([1, 1], tf.int32)
+    score_thresh = 0.1
+    iou_thresh = .5
+    max_output_size = 4
+
+    exp_nms_corners = [[[0, 0, 1, 1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]],
+                       [[0, 10.1, 1, 11.1],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0],
+                        [0, 0, 0, 0]]]
+    exp_nms_scores = [[.9, 0, 0, 0],
+                      [.5, 0, 0, 0]]
+    exp_nms_classes = [[0, 0, 0, 0],
+                       [0, 0, 0, 0]]
+    exp_nms_additional_fields = {
+        'keypoints': np.array([[[[6, 7], [8, 9]],
+                                [[0, 0], [0, 0]],
+                                [[0, 0], [0, 0]],
+                                [[0, 0], [0, 0]]],
+                               [[[13, 14], [15, 16]],
+                                [[0, 0], [0, 0]],
+                                [[0, 0], [0, 0]],
+                                [[0, 0], [0, 0]]]])
+    }
+
+    (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+     nmsed_additional_fields, num_detections
+    ) = post_processing.batch_multiclass_non_max_suppression(
+        boxes, scores, score_thresh, iou_thresh,
+        max_size_per_class=max_output_size, max_total_size=max_output_size,
+        num_valid_boxes=num_valid_boxes,
+        additional_fields=additional_fields)
+
+    self.assertIsNone(nmsed_masks)
+
+    with self.test_session() as sess:
+      (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
+       num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+                                   nmsed_additional_fields, num_detections])
+
+      self.assertAllClose(nmsed_boxes, exp_nms_corners)
+      self.assertAllClose(nmsed_scores, exp_nms_scores)
+      self.assertAllClose(nmsed_classes, exp_nms_classes)
+      for key in exp_nms_additional_fields:
+        self.assertAllClose(nmsed_additional_fields[key],
+                            exp_nms_additional_fields[key])
+      self.assertAllClose(num_detections, [1, 1])
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/meta_architectures/BUILD
+++ b/research/object_detection/meta_architectures/BUILD
@@ -18,6 +18,7 @@ py_library(
        "//tensorflow_models/object_detection/core:model",
        "//tensorflow_models/object_detection/core:target_assigner",
        "//tensorflow_models/object_detection/utils:shape_utils",
+        "//tensorflow_models/object_detection/utils:visualization_utils",
    ],
 )


--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
@@ -15,6 +15,7 @@

 """Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""

+import numpy as np
 import tensorflow as tf

 from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
@@ -46,19 +47,19 @@ class FasterRCNNMetaArchTest(

    mask_height = 2
    mask_width = 2
-    mask_predictions = .6 * tf.ones(
+    mask_predictions = 30. * tf.ones(
        [total_num_padded_proposals, model.num_classes,
         mask_height, mask_width], dtype=tf.float32)
-    exp_detection_masks = [[[[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]]],
-                           [[[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[1, 1], [1, 1]],
-                            [[0, 0], [0, 0]]]]
+    exp_detection_masks = np.array([[[[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]]],
+                                    [[[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[1, 1], [1, 1]],
+                                     [[0, 0], [0, 0]]]])

    detections = model.postprocess({
        'refined_box_encodings': refined_box_encodings,
@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest(
      self.assertAllClose(detections_out['detection_masks'],
                          exp_detection_masks)

+  def _get_box_classifier_features_shape(self,
+                                         image_size,
+                                         batch_size,
+                                         max_num_proposals,
+                                         initial_crop_size,
+                                         maxpool_stride,
+                                         num_features):
+    return (batch_size * max_num_proposals,
+            initial_crop_size/maxpool_stride,
+            initial_crop_size/maxpool_stride,
+            num_features)

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
--- a/research/object_detection/meta_architectures/rfcn_meta_arch.py
+++ b/research/object_detection/meta_architectures/rfcn_meta_arch.py
@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
               second_stage_score_conversion_fn,
               second_stage_localization_loss_weight,
               second_stage_classification_loss_weight,
+               second_stage_classification_loss,
               hard_example_miner,
               parallel_iterations=16):
    """RFCNMetaArch Constructor.
@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        used to convert logits to probabilities.
      second_stage_localization_loss_weight: A float
      second_stage_classification_loss_weight: A float
+      second_stage_classification_loss: A string indicating which loss function
+        to use, supports 'softmax' and 'sigmoid'.
      hard_example_miner:  A losses.HardExampleMiner object (can be None).
      parallel_iterations: (Optional) The number of iterations allowed to run
        in parallel for calls to tf.map_fn.
@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        second_stage_score_conversion_fn,
        second_stage_localization_loss_weight,
        second_stage_classification_loss_weight,
+        second_stage_classification_loss,
+        1.0,  # second stage mask prediction loss weight isn't used in R-FCN.
        hard_example_miner,
        parallel_iterations)

@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
    """Predicts the output tensors from 2nd stage of FasterRCNN.

    Args:
-      rpn_box_encodings: 3-D float tensor of shape
+      rpn_box_encodings: 4-D float tensor of shape
        [batch_size, num_valid_anchors, self._box_coder.code_size] containing
        predicted boxes.
-      rpn_objectness_predictions_with_background: 3-D float tensor of shape
+      rpn_objectness_predictions_with_background: 2-D float tensor of shape
        [batch_size, num_valid_anchors, 2] containing class
        predictions (logits) for each of the anchors.  Note that this
        tensor *includes* background class predictions (at class index 0).
@@ -225,13 +230,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
          Note that this tensor *includes* background class predictions
          (at class index 0).
        3) num_proposals: An int32 tensor of shape [batch_size] representing the
-          number of proposals generated by the RPN.  `num_proposals` allows us
+          number of proposals generated by the RPN. `num_proposals` allows us
          to keep track of which entries are to be treated as zero paddings and
          which are not since we always pad the number of proposals to be
          `self.max_num_proposals` for each image.
        4) proposal_boxes: A float32 tensor of shape
          [batch_size, self.max_num_proposals, 4] representing
          decoded proposal bounding boxes (in absolute coordinates).
+        5) proposal_boxes_normalized: A float32 tensor of shape
+          [batch_size, self.max_num_proposals, 4] representing decoded proposal
+          bounding boxes (in normalized coordinates). Can be used to override
+          the boxes proposed by the RPN, thus enabling one to extract box
+          classification and prediction for externally selected areas of the
+          image.
+        6) box_classifier_features: a 4-D float32 tensor, of shape
+          [batch_size, feature_map_height, feature_map_width, depth],
+          representing the box classifier features.
    """
    proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
        rpn_box_encodings, rpn_objectness_predictions_with_background,
@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': absolute_proposal_boxes,
+        'box_classifier_features': box_classifier_features,
+        'proposal_boxes_normalized': proposal_boxes_normalized,
    }
    return prediction_dict
--- a/research/object_detection/meta_architectures/rfcn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/rfcn_meta_arch_test.py
@@ -51,6 +51,15 @@ class RFCNMetaArchTest(
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=box_predictor, **common_kwargs)

+  def _get_box_classifier_features_shape(self,
+                                         image_size,
+                                         batch_size,
+                                         max_num_proposals,
+                                         initial_crop_size,
+                                         maxpool_stride,
+                                         num_features):
+    return (batch_size, image_size, image_size, num_features)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/meta_architectures/ssd_meta_arch.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """SSD Meta-architecture definition.

 General tensorflow implementation of convolutional Multibox/SSD detection
@@ -29,6 +28,7 @@ from object_detection.core import model
 from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner
 from object_detection.utils import shape_utils
+from object_detection.utils import visualization_utils

 slim = tf.contrib.slim

@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object):
  """SSD Feature Extractor definition."""

  def __init__(self,
+               is_training,
               depth_multiplier,
               min_depth,
+               pad_to_multiple,
               conv_hyperparams,
+               batch_norm_trainable=True,
               reuse_weights=None):
+    """Constructor.
+
+    Args:
+      is_training: whether the network is in training mode.
+      depth_multiplier: float depth multiplier for feature extractor.
+      min_depth: minimum feature extractor depth.
+      pad_to_multiple: the nearest multiple to zero pad the input height and
+        width dimensions to.
+      conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
+      batch_norm_trainable: Whether to update batch norm parameters during
+        training or not. When training with a small batch size
+        (e.g. 1), it is desirable to disable batch norm update and use
+        pretrained batch norm params.
+      reuse_weights: whether to reuse variables. Default is None.
+    """
+    self._is_training = is_training
    self._depth_multiplier = depth_multiplier
    self._min_depth = min_depth
+    self._pad_to_multiple = pad_to_multiple
    self._conv_hyperparams = conv_hyperparams
+    self._batch_norm_trainable = batch_norm_trainable
    self._reuse_weights = reuse_weights

  @abstractmethod
@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel):
               add_summaries=True):
    """SSDMetaArch Constructor.

-    TODO: group NMS parameters + score converter into
-    a class and loss parameters into a class and write config protos for
-    postprocessing and losses.
+    TODO: group NMS parameters + score converter into a class and loss
+    parameters into a class and write config protos for postprocessing
+    and losses.

    Args:
      is_training: A boolean indicating whether the training version of the
@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel):
    if inputs.dtype is not tf.float32:
      raise ValueError('`preprocess` expects a tf.float32 tensor')
    with tf.name_scope('Preprocessor'):
-      # TODO: revisit whether to always use batch size as  the number of
-      # parallel iterations vs allow for dynamic batching.
+      # TODO: revisit whether to always use batch size as the number of parallel
+      # iterations vs allow for dynamic batching.
      resized_inputs = tf.map_fn(self._image_resizer_fn,
                                 elems=inputs,
                                 dtype=tf.float32)
@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel):

    Returns:
      prediction_dict: a dictionary holding "raw" prediction tensors:
-        1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors,
+        1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel):
          background class predictions (at class index 0).
        3) feature_maps: a list of tensors where the ith tensor has shape
          [batch, height_i, width_i, depth_i].
+        4) anchors: 2-D float tensor of shape [num_anchors, 4] containing
+          the generated anchors in normalized coordinates.
    """
    with tf.variable_scope(None, self._extract_features_scope,
                           [preprocessed_inputs]):
      feature_maps = self._feature_extractor.extract_features(
          preprocessed_inputs)
    feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
-    self._anchors = self._anchor_generator.generate(feature_map_spatial_dims)
+    image_shape = tf.shape(preprocessed_inputs)
+    self._anchors = self._anchor_generator.generate(
+        feature_map_spatial_dims,
+        im_height=image_shape[1],
+        im_width=image_shape[2])
    (box_encodings, class_predictions_with_background
    ) = self._add_box_predictions_to_feature_maps(feature_maps)
    predictions_dict = {
        'box_encodings': box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
-        'feature_maps': feature_maps
+        'feature_maps': feature_maps,
+        'anchors': self._anchors.get()
    }
    return predictions_dict

@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel):

    Returns:
      detections: a dictionary containing the following fields
-        detection_boxes: [batch, max_detection, 4]
+        detection_boxes: [batch, max_detections, 4]
        detection_scores: [batch, max_detections]
        detection_classes: [batch, max_detections]
+        detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
+          encoded in the prediction_dict 'box_encodings')
        num_detections: [batch]
    Raises:
      ValueError: if prediction_dict does not contain `box_encodings` or
@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel):
    with tf.name_scope('Postprocessor'):
      box_encodings = prediction_dict['box_encodings']
      class_predictions = prediction_dict['class_predictions_with_background']
-      detection_boxes = self._batch_decode(box_encodings)
+      detection_boxes, detection_keypoints = self._batch_decode(box_encodings)
      detection_boxes = tf.expand_dims(detection_boxes, axis=2)

      class_predictions_without_background = tf.slice(class_predictions,
@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel):
      detection_scores = self._score_conversion_fn(
          class_predictions_without_background)
      clip_window = tf.constant([0, 0, 1, 1], tf.float32)
-      (nmsed_boxes, nmsed_scores, nmsed_classes, _,
-       num_detections) = self._non_max_suppression_fn(detection_boxes,
-                                                      detection_scores,
-                                                      clip_window=clip_window)
-      return {'detection_boxes': nmsed_boxes,
-              'detection_scores': nmsed_scores,
-              'detection_classes': nmsed_classes,
-              'num_detections': tf.to_float(num_detections)}
+      additional_fields = None
+      if detection_keypoints is not None:
+        additional_fields = {
+            fields.BoxListFields.keypoints: detection_keypoints}
+      (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
+       num_detections) = self._non_max_suppression_fn(
+           detection_boxes,
+           detection_scores,
+           clip_window=clip_window,
+           additional_fields=additional_fields)
+      detection_dict = {'detection_boxes': nmsed_boxes,
+                        'detection_scores': nmsed_scores,
+                        'detection_classes': nmsed_classes,
+                        'num_detections': tf.to_float(num_detections)}
+      if (nmsed_additional_fields is not None and
+          fields.BoxListFields.keypoints in nmsed_additional_fields):
+        detection_dict['detection_keypoints'] = nmsed_additional_fields[
+            fields.BoxListFields.keypoints]
+      return detection_dict

  def loss(self, prediction_dict, scope=None):
    """Compute scalar loss tensors with respect to provided groundtruth.
@@ -395,7 +436,7 @@ class SSDMetaArch(model.DetectionModel):
          box_code_dimension] containing predicted boxes.
        2) class_predictions_with_background: 3-D float tensor of shape
          [batch_size, num_anchors, num_classes+1] containing class predictions
-          (logits) for each of the anchors.  Note that this tensor *includes*
+          (logits) for each of the anchors. Note that this tensor *includes*
          background class predictions.
      scope: Optional scope name.

@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel):
        values.
    """
    with tf.name_scope(scope, 'Loss', prediction_dict.values()):
+      keypoints = None
+      if self.groundtruth_has_field(fields.BoxListFields.keypoints):
+        keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
      (batch_cls_targets, batch_cls_weights, batch_reg_targets,
       batch_reg_weights, match_list) = self._assign_targets(
           self.groundtruth_lists(fields.BoxListFields.boxes),
-           self.groundtruth_lists(fields.BoxListFields.classes))
+           self.groundtruth_lists(fields.BoxListFields.classes),
+           keypoints)
      if self._add_summaries:
        self._summarize_input(
            self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel):
      location_losses = self._localization_loss(
          prediction_dict['box_encodings'],
          batch_reg_targets,
+          ignore_nan_targets=True,
          weights=batch_reg_weights)
      cls_losses = self._classification_loss(
          prediction_dict['class_predictions_with_background'],
          batch_cls_targets,
          weights=batch_cls_weights)

-      # Optionally apply hard mining on top of loss values
-      localization_loss = tf.reduce_sum(location_losses)
-      classification_loss = tf.reduce_sum(cls_losses)
      if self._hard_example_miner:
        (localization_loss, classification_loss) = self._apply_hard_mining(
            location_losses, cls_losses, prediction_dict, match_list)
        if self._add_summaries:
          self._hard_example_miner.summarize()
+      else:
+        if self._add_summaries:
+          class_ids = tf.argmax(batch_cls_targets, axis=2)
+          flattened_class_ids = tf.reshape(class_ids, [-1])
+          flattened_classification_losses = tf.reshape(cls_losses, [-1])
+          self._summarize_anchor_classification_loss(
+              flattened_class_ids, flattened_classification_losses)
+        localization_loss = tf.reduce_sum(location_losses)
+        classification_loss = tf.reduce_sum(cls_losses)

      # Optionally normalize by number of positive matches
      normalizer = tf.constant(1.0, dtype=tf.float32)
      if self._normalize_loss_by_num_matches:
        normalizer = tf.maximum(tf.to_float(tf.reduce_sum(num_matches)), 1.0)

+      with tf.name_scope('localization_loss'):
+        localization_loss = ((self._localization_loss_weight / normalizer) *
+                             localization_loss)
+      with tf.name_scope('classification_loss'):
+        classification_loss = ((self._classification_loss_weight / normalizer) *
+                               classification_loss)
+
      loss_dict = {
-          'localization_loss': (self._localization_loss_weight / normalizer) *
-                               localization_loss,
-          'classification_loss': (self._classification_loss_weight /
-                                  normalizer) * classification_loss
+          'localization_loss': localization_loss,
+          'classification_loss': classification_loss
      }
    return loss_dict

-  def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list):
+  def _summarize_anchor_classification_loss(self, class_ids, cls_losses):
+    positive_indices = tf.where(tf.greater(class_ids, 0))
+    positive_anchor_cls_loss = tf.squeeze(
+        tf.gather(cls_losses, positive_indices), axis=1)
+    visualization_utils.add_cdf_image_summary(positive_anchor_cls_loss,
+                                              'PositiveAnchorLossCDF')
+    negative_indices = tf.where(tf.equal(class_ids, 0))
+    negative_anchor_cls_loss = tf.squeeze(
+        tf.gather(cls_losses, negative_indices), axis=1)
+    visualization_utils.add_cdf_image_summary(negative_anchor_cls_loss,
+                                              'NegativeAnchorLossCDF')
+
+  def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list,
+                      groundtruth_keypoints_list=None):
    """Assign groundtruth targets.

    Adds a background class to each one-hot encoding of groundtruth classes
@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel):
      groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
        shape [num_boxes, num_classes] containing the class targets with the 0th
        index assumed to map to the first non-background class.
+      groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape
+        [num_boxes, num_keypoints, 2]

    Returns:
      batch_cls_targets: a tensor with shape [batch_size, num_anchors,
@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel):
        tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
        for one_hot_encoding in groundtruth_classes_list
    ]
+    if groundtruth_keypoints_list is not None:
+      for boxlist, keypoints in zip(
+          groundtruth_boxlists, groundtruth_keypoints_list):
+        boxlist.add_field(fields.BoxListFields.keypoints, keypoints)
    return target_assigner.batch_assign_targets(
        self._target_assigner, self.anchors, groundtruth_boxlists,
        groundtruth_classes_with_background_list)
@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel):
      mined_cls_loss: a float scalar with sum of classification losses from
        selected hard examples.
    """
-    class_pred_shape = [-1, self.anchors.num_boxes_static(), self.num_classes]
-    class_predictions = tf.reshape(
-        tf.slice(prediction_dict['class_predictions_with_background'],
-                 [0, 0, 1], class_pred_shape), class_pred_shape)
+    class_predictions = tf.slice(
+        prediction_dict['class_predictions_with_background'], [0, 0,
+                                                               1], [-1, -1, -1])

-    decoded_boxes = self._batch_decode(prediction_dict['box_encodings'])
+    decoded_boxes, _ = self._batch_decode(prediction_dict['box_encodings'])
    decoded_box_tensors_list = tf.unstack(decoded_boxes)
    class_prediction_list = tf.unstack(class_predictions)
    decoded_boxlist_list = []
@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel):
    Returns:
      decoded_boxes: A float32 tensor of shape
        [batch_size, num_anchors, 4] containing the decoded boxes.
+      decoded_keypoints: A float32 tensor of shape
+        [batch_size, num_anchors, num_keypoints, 2] containing the decoded
+        keypoints if present in the input `box_encodings`, None otherwise.
    """
    combined_shape = shape_utils.combined_static_and_dynamic_shape(
        box_encodings)
@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel):
    tiled_anchor_boxes = tf.tile(
        tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
    tiled_anchors_boxlist = box_list.BoxList(
-        tf.reshape(tiled_anchor_boxes, [-1, self._box_coder.code_size]))
+        tf.reshape(tiled_anchor_boxes, [-1, 4]))
    decoded_boxes = self._box_coder.decode(
        tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
        tiled_anchors_boxlist)
-    return tf.reshape(decoded_boxes.get(),
-                      tf.stack([combined_shape[0], combined_shape[1],
-                                4]))
+    decoded_keypoints = None
+    if decoded_boxes.has_field(fields.BoxListFields.keypoints):
+      decoded_keypoints = decoded_boxes.get_field(
+          fields.BoxListFields.keypoints)
+      num_keypoints = decoded_keypoints.get_shape()[1]
+      decoded_keypoints = tf.reshape(
+          decoded_keypoints,
+          tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
+    decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
+        [combined_shape[0], combined_shape[1], 4]))
+    return decoded_boxes, decoded_keypoints

  def restore_map(self, from_detection_checkpoint=True):
    """Returns a map of variables to load from a foreign checkpoint.

--- a/research/object_detection/meta_architectures/ssd_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/ssd_meta_arch_test.py
@@ -18,7 +18,6 @@ import functools
 import numpy as np
 import tensorflow as tf

-from tensorflow.python.training import saver as tf_saver
 from object_detection.core import anchor_generator
 from object_detection.core import box_list
 from object_detection.core import losses
@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):

  def __init__(self):
    super(FakeSSDFeatureExtractor, self).__init__(
-        depth_multiplier=0, min_depth=0, conv_hyperparams=None)
+        is_training=True,
+        depth_multiplier=0,
+        min_depth=0,
+        pad_to_multiple=1,
+        batch_norm_trainable=True,
+        conv_hyperparams=None)

  def preprocess(self, resized_inputs):
    return tf.identity(resized_inputs)
@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
  def num_anchors_per_location(self):
    return [1]

-  def _generate(self, feature_map_shape_list):
+  def _generate(self, feature_map_shape_list, im_height, im_width):
    return box_list.BoxList(
        tf.constant([[0, 0, .5, .5],
                     [0, .5, .5, 1],
@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase):
        self.assertTrue('box_encodings' in prediction_dict)
        self.assertTrue('class_predictions_with_background' in prediction_dict)
        self.assertTrue('feature_maps' in prediction_dict)
+        self.assertTrue('anchors' in prediction_dict)

        init_op = tf.global_variables_initializer()
      with self.test_session(graph=tf_graph) as sess:
@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase):

  def test_restore_map_for_detection_ckpt(self):
    init_op = tf.global_variables_initializer()
-    saver = tf_saver.Saver()
+    saver = tf.train.Saver()
    save_path = self.get_temp_dir()
    with self.test_session() as sess:
      sess.run(init_op)