Merged commit includes the following changes: (#8803)

320117767 by ronnyvotel: DensePose postprocessing implementation. -- 320065853 by ronnyvotel: Updating how masks are reframed, so that it works on float and uint8 masks. -- 320061717 by yuhuic: Updated CenterNet restore_from_objects to allow the model to load the checkpoints saved during training. -- 319835172 by ronnyvotel: Updating how the DensePose UV Symmetries MAT file path is constructed and loaded. -- 319834678 by ronnyvotel: First update to CenterNetMetaArch for DensePose. Adding prediction and loss functionality. -- 319810261 by rathodv: Create a setup.py file to simplify installation. Usage: "python object_detection/packages/tf1/setup.py install" for TF1. "python object_detection/packages/tf2/setup.py install" for TF2. or to create source distribution "python object_detection/packages/tf1/setup.py sdist" for TF1. "python object_detection/packages/tf2/setup.py sdist" for TF2. -- 319803041 by sbeery: Updating documentation for export -- 319688087 by rathodv: Update as_matrix() to to_numpy() to avoid failures with python3.6 -- 319686183 by vighneshb: Require tpu_name when use_tpu is set. -- 319613327 by aom: EfficientDet-style Data Augmentation. -- 319572180 by rathodv: Add TF2 SSD FPN (a.k.a RetinaNet) configs. -- 319553823 by rathodv: Internal Change. -- PiperOrigin-RevId: 320117767 Co-authored-by: TF Object Detection Team <no-reply@google.com>

Merged commit includes the following changes: (#8803)
320117767 by ronnyvotel: DensePose postprocessing implementation. -- 320065853 by ronnyvotel: Updating how masks are reframed, so that it works on float and uint8 masks. -- 320061717 by yuhuic: Updated CenterNet restore_from_objects to allow the model to load the checkpoints saved during training. -- 319835172 by ronnyvotel: Updating how the DensePose UV Symmetries MAT file path is constructed and loaded. -- 319834678 by ronnyvotel: First update to CenterNetMetaArch for DensePose. Adding prediction and loss functionality. -- 319810261 by rathodv: Create a setup.py file to simplify installation. Usage: "python object_detection/packages/tf1/setup.py install" for TF1. "python object_detection/packages/tf2/setup.py install" for TF2. or to create source distribution "python object_detection/packages/tf1/setup.py sdist" for TF1. "python object_detection/packages/tf2/setup.py sdist" for TF2. -- 319803041 by sbeery: Updating documentation for export -- 319688087 by rathodv: Update as_matrix() to to_numpy() to avoid failures with python3.6 -- 319686183 by vighneshb: Require tpu_name when use_tpu is set. -- 319613327 by aom: EfficientDet-style Data Augmentation. -- 319572180 by rathodv: Add TF2 SSD FPN (a.k.a RetinaNet) configs. -- 319553823 by rathodv: Internal Change. -- PiperOrigin-RevId: 320117767 Co-authored-by: TF Object Detection Team <no-reply@google.com>
52bb4ab1 · vivek rathod · GitHub · 3b56ba8d · 52bb4ab1 · 52bb4ab1
Unverified Commit 52bb4ab1 authored Jul 08, 2020 by vivek rathod Committed by GitHub Jul 08, 2020
20 changed files
--- a/research/object_detection/builders/box_predictor_builder_test.py
+++ b/research/object_detection/builders/box_predictor_builder_test.py
@@ -17,9 +17,8 @@
 """Tests for box_predictor_builder."""
 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
 from google.protobuf import text_format
 from object_detection.builders import box_predictor_builder
 from object_detection.builders import hyperparams_builder

--- a/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
+++ b/research/object_detection/builders/graph_rewriter_builder_tf1_test.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 """Tests for graph_rewriter_builder."""
 import unittest
-import mock
+from unittest import mock  # pylint: disable=g-importing-member
 import tensorflow.compat.v1 as tf
 import tf_slim as slim

--- a/research/object_detection/builders/preprocessor_builder.py
+++ b/research/object_detection/builders/preprocessor_builder.py
@@ -417,4 +417,12 @@ def build(preprocessor_step_config):
        'num_scales': config.num_scales
    }
+  if step_type == 'random_scale_crop_and_pad_to_square':
+    config = preprocessor_step_config.random_scale_crop_and_pad_to_square
+    return preprocessor.random_scale_crop_and_pad_to_square, {
+        'scale_min': config.scale_min,
+        'scale_max': config.scale_max,
+        'output_size': config.output_size,
+    }
  raise ValueError('Unknown preprocessing step.')
--- a/research/object_detection/core/densepose_ops.py
+++ b/research/object_detection/core/densepose_ops.py
@@ -42,9 +42,6 @@ PART_NAMES = [
    b'left_face',
 ]
-_SRC_PATH = ('google3/third_party/tensorflow_models/object_detection/'
-             'dataset_tools/densepose')
 def scale(dp_surface_coords, y_scale, x_scale, scope=None):
  """Scales DensePose coordinates in y and x dimensions.
@@ -266,10 +263,14 @@ class DensePoseHorizontalFlip(object):
  def __init__(self):
    """Constructor."""
-    uv_symmetry_transforms_path = os.path.join(
+    path = os.path.dirname(os.path.abspath(__file__))
-        tf.resource_loader.get_data_files_path(), '..', 'dataset_tools',
+    uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
-        'densepose', 'UV_symmetry_transforms.mat')
+        os.path.join(path, '..', 'dataset_tools', 'densepose',
-    data = scipy.io.loadmat(uv_symmetry_transforms_path)
+                     'UV_symmetry_transforms.mat'))
+    tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
+        uv_symmetry_transforms_path))
+    with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
+      data = scipy.io.loadmat(f)
    # Create lookup maps which indicate how a VU coordinate changes after a
    # horizontal flip.

--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*}
        fields.InputDataFields.is_annotated.
    Returns:
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*} or
        fields.InputDataFields.is_annotated.
    Returns:
@@ -288,19 +288,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass
-  def provide_groundtruth(self,
+  def provide_groundtruth(
-                          groundtruth_boxes_list,
+      self,
-                          groundtruth_classes_list,
+      groundtruth_boxes_list,
-                          groundtruth_masks_list=None,
+      groundtruth_classes_list,
-                          groundtruth_keypoints_list=None,
+      groundtruth_masks_list=None,
-                          groundtruth_keypoint_visibilities_list=None,
+      groundtruth_keypoints_list=None,
-                          groundtruth_weights_list=None,
+      groundtruth_keypoint_visibilities_list=None,
-                          groundtruth_confidences_list=None,
+      groundtruth_dp_num_points_list=None,
-                          groundtruth_is_crowd_list=None,
+      groundtruth_dp_part_ids_list=None,
-                          groundtruth_group_of_list=None,
+      groundtruth_dp_surface_coords_list=None,
-                          groundtruth_area_list=None,
+      groundtruth_weights_list=None,
-                          is_annotated_list=None,
+      groundtruth_confidences_list=None,
-                          groundtruth_labeled_classes=None):
+      groundtruth_is_crowd_list=None,
+      groundtruth_group_of_list=None,
+      groundtruth_area_list=None,
+      is_annotated_list=None,
+      groundtruth_labeled_classes=None):
    """Provide groundtruth tensors.
    Args:
@@ -324,6 +328,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        `groundtruth_keypoint_visibilities_list`).
      groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
        of shape [num_boxes, num_keypoints] containing keypoint visibilities.
+      groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
+        [num_boxes] containing the number of DensePose sampled points.
+      groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding.
+      groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, max_sampled_points, 4] containing the DensePose
+        surface coordinates for each sampled point. Note that there may be
+        padding.
      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
        [num_boxes] containing weights for groundtruth boxes.
      groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
@@ -361,6 +374,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      self._groundtruth_lists[
          fields.BoxListFields.keypoint_visibilities] = (
              groundtruth_keypoint_visibilities_list)
+    if groundtruth_dp_num_points_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_num_points] = (
+              groundtruth_dp_num_points_list)
+    if groundtruth_dp_part_ids_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_part_ids] = (
+              groundtruth_dp_part_ids_list)
+    if groundtruth_dp_surface_coords_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_surface_coords] = (
+              groundtruth_dp_surface_coords_list)
    if groundtruth_is_crowd_list:
      self._groundtruth_lists[
          fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  Args:
    image: rank 3 float32 tensor containing 1 image ->
-           [height, width,channels].
+           [height, width, channels].
    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  return return_values
+def random_scale_crop_and_pad_to_square(
+    image,
+    boxes,
+    labels,
+    label_weights,
+    masks=None,
+    keypoints=None,
+    scale_min=0.1,
+    scale_max=2.0,
+    output_size=512,
+    resize_method=tf.image.ResizeMethod.BILINEAR,
+    seed=None):
+  """Randomly scale, crop, and then pad an image to fixed square dimensions.
+   Randomly scale, crop, and then pad an image to the desired square output
+   dimensions. Specifically, this method first samples a random_scale factor
+   from a uniform distribution between scale_min and scale_max, and then resizes
+   the image such that it's maximum dimension is (output_size * random_scale).
+   Secondly, a square output_size crop is extracted from the resized image
+   (note, this will only occur when random_scale > 1.0). Lastly, the cropped
+   region is padded to the desired square output_size, by filling with zeros.
+   The augmentation is borrowed from [1]
+   [1]: https://arxiv.org/abs/1911.09070
+  Args:
+    image: rank 3 float32 tensor containing 1 image ->
+      [height, width, channels].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
+      are in normalized form meaning their coordinates vary between [0, 1]. Each
+      row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
+      are clipped to the boundary and boxes falling outside the crop are
+      ignored.
+    labels: rank 1 int32 tensor containing the object classes.
+    label_weights: float32 tensor of shape [num_instances] representing the
+      weight for each box.
+    masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
+      width] containing instance masks. The masks are of the same height, width
+      as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
+      num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
+    scale_min: float, the minimum value for the random scale factor.
+    scale_max: float, the maximum value for the random scale factor.
+    output_size: int, the desired (square) output image size.
+    resize_method: tf.image.ResizeMethod, resize method to use when scaling the
+      input images.
+    seed: random seed.
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    label_weights: rank 1 float32 tensor with shape [num_instances].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+  """
+  img_shape = tf.shape(image)
+  input_height, input_width = img_shape[0], img_shape[1]
+  random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
+  # Compute the scaled height and width from the random scale.
+  max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
+  input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
+  input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
+  scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
+  scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
+  # Compute the offsets:
+  offset_y = tf.cast(scaled_height - output_size, tf.float32)
+  offset_x = tf.cast(scaled_width - output_size, tf.float32)
+  offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_y = tf.cast(offset_y, tf.int32)
+  offset_x = tf.cast(offset_x, tf.int32)
+  # Scale, crop, and pad the input image.
+  scaled_image = tf.image.resize_images(
+      image, [scaled_height, scaled_width], method=resize_method)
+  scaled_image = scaled_image[offset_y:offset_y + output_size,
+                              offset_x:offset_x + output_size, :]
+  output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
+                                              output_size)
+  # Update the boxes.
+  new_window = tf.cast(
+      tf.stack([offset_y, offset_x,
+                offset_y + output_size, offset_x + output_size]),
+      dtype=tf.float32)
+  new_window /= tf.cast(
+      tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
+      dtype=tf.float32)
+  boxlist = box_list.BoxList(boxes)
+  boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  boxlist, indices = box_list_ops.prune_completely_outside_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0])
+  boxlist = box_list_ops.clip_to_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
+  return_values = [output_image, boxlist.get(),
+                   tf.gather(labels, indices),
+                   tf.gather(label_weights, indices)]
+  if masks is not None:
+    new_masks = tf.expand_dims(masks, -1)
+    new_masks = tf.image.resize_images(
+        new_masks, [scaled_height, scaled_width], method=resize_method)
+    new_masks = new_masks[:, offset_y:offset_y + output_size,
+                          offset_x:offset_x + output_size, :]
+    new_masks = tf.image.pad_to_bounding_box(
+        new_masks, 0, 0, output_size, output_size)
+    new_masks = tf.squeeze(new_masks, [-1])
+    return_values.append(tf.gather(new_masks, indices))
+  if keypoints is not None:
+    keypoints = tf.gather(keypoints, indices)
+    keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
+    keypoints = keypoint_ops.prune_outside_window(
+        keypoints, [0.0, 0.0, 1.0, 1.0])
+    return_values.append(keypoints)
+  return return_values
 def get_default_func_arg_map(include_label_weights=True,
                             include_label_confidences=False,
                             include_multiclass_scores=False,
@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
      random_adjust_saturation: (fields.InputDataFields.image,),
      random_distort_color: (fields.InputDataFields.image,),
      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
-      random_crop_image: (fields.InputDataFields.image,
+      random_crop_image:
-                          fields.InputDataFields.groundtruth_boxes,
+          (fields.InputDataFields.image,
-                          fields.InputDataFields.groundtruth_classes,
+           fields.InputDataFields.groundtruth_boxes,
-                          groundtruth_label_weights,
+           fields.InputDataFields.groundtruth_classes,
-                          groundtruth_label_confidences, multiclass_scores,
+           groundtruth_label_weights, groundtruth_label_confidences,
-                          groundtruth_instance_masks, groundtruth_keypoints,
+           multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
-                          groundtruth_keypoint_visibilities,
+           groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
-                          groundtruth_dp_num_points, groundtruth_dp_part_ids,
+           groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
-                          groundtruth_dp_surface_coords),
      random_pad_image:
          (fields.InputDataFields.image,
           fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
           fields.InputDataFields.groundtruth_classes,
           groundtruth_label_weights, groundtruth_instance_masks,
           groundtruth_keypoints),
+      random_scale_crop_and_pad_to_square:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_instance_masks,
+           groundtruth_keypoints),
  }
  return prep_func_arg_map

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
                                test_masks=True,
                                test_keypoints=True)
-  @parameterized.parameters(
-      {'include_dense_pose': False},
-      {'include_dense_pose': True}
-  )
-  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
-    def graph_fn():
-      preprocess_options = [(preprocessor.random_horizontal_flip, {})]
-      image_height = 3
-      image_width = 3
-      images = tf.random_uniform([1, image_height, image_width, 3])
-      boxes = self.createTestBoxes()
-      masks = self.createTestMasks()
-      keypoints, keypoint_visibilities = self.createTestKeypoints()
-      dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
-      keypoint_flip_permutation = self.createKeypointFlipPermutation()
-      tensor_dict = {
-          fields.InputDataFields.image:
-              images,
-          fields.InputDataFields.groundtruth_boxes:
-              boxes,
-          fields.InputDataFields.groundtruth_instance_masks:
-              masks,
-          fields.InputDataFields.groundtruth_keypoints:
-              keypoints,
-          fields.InputDataFields.groundtruth_keypoint_visibilities:
-              keypoint_visibilities
-      }
-      if include_dense_pose:
-        tensor_dict.update({
-            fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
-            fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
-            fields.InputDataFields.groundtruth_dp_surface_coords:
-                dp_surface_coords
-        })
-      preprocess_options = [(preprocessor.random_horizontal_flip, {
-          'keypoint_flip_permutation': keypoint_flip_permutation
-      })]
-      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
-          include_instance_masks=True,
-          include_keypoints=True,
-          include_keypoint_visibilities=True,
-          include_dense_pose=include_dense_pose)
-      tensor_dict = preprocessor.preprocess(
-          tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
-      boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-      masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
-      keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
-      keypoint_visibilities = tensor_dict[
-          fields.InputDataFields.groundtruth_keypoint_visibilities]
-      output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
-      if include_dense_pose:
-        dp_num_points = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_num_points]
-        dp_part_ids = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_part_ids]
-        dp_surface_coords = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_surface_coords]
-        output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
-      return output_tensors
-    output_tensors = self.execute_cpu(graph_fn, [])
-    self.assertIsNotNone(output_tensors[0])  # Boxes.
-    self.assertIsNotNone(output_tensors[1])  # Masks.
-    self.assertIsNotNone(output_tensors[2])  # Keypoints
-    self.assertIsNotNone(output_tensors[3])  # Keypoint Visibilities.
-    if include_dense_pose:
-      self.assertIsNotNone(output_tensors[4])  # DensePose Num Points.
-      self.assertIsNotNone(output_tensors[5])  # DensePose Part IDs.
-      self.assertIsNotNone(output_tensors[6])  # DensePose Surface Coords
  def testRandomVerticalFlip(self):
@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
  @parameterized.parameters(
      {'include_dense_pose': False},
-      {'include_dense_pose': True}
  )
  def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
    def graph_fn():
@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    size = max(image.shape)
    self.assertAlmostEqual(scale * 256.0, size)
+    self.assertAllClose(image[:, :, 0], masks[0, :, :])
+  @parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
+                                  ('scale_2_0', 2.0))
+  def test_random_scale_crop_and_pad_to_square(self, scale):
+    def graph_fn():
+      image = np.random.randn(512, 256, 1)
+      box_centers = [0.25, 0.5, 0.75]
+      box_size = 0.1
+      box_corners = []
+      box_labels = []
+      box_label_weights = []
+      keypoints = []
+      masks = []
+      for center_y in box_centers:
+        for center_x in box_centers:
+          box_corners.append(
+              [center_y - box_size / 2.0, center_x - box_size / 2.0,
+               center_y + box_size / 2.0, center_x + box_size / 2.0])
+          box_labels.append([1])
+          box_label_weights.append([1.])
+          keypoints.append(
+              [[center_y - box_size / 2.0, center_x - box_size / 2.0],
+               [center_y + box_size / 2.0, center_x + box_size / 2.0]])
+          masks.append(image[:, :, 0].reshape(512, 256))
+      image = tf.constant(image)
+      boxes = tf.constant(box_corners)
+      labels = tf.constant(box_labels)
+      label_weights = tf.constant(box_label_weights)
+      keypoints = tf.constant(keypoints)
+      masks = tf.constant(np.stack(masks))
+      (new_image, new_boxes, _, _, new_masks,
+       new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
+           image,
+           boxes,
+           labels,
+           label_weights,
+           masks=masks,
+           keypoints=keypoints,
+           scale_min=scale,
+           scale_max=scale,
+           output_size=512)
+      return new_image, new_boxes, new_masks, new_keypoints
+    image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
+    # Since random_scale_crop_and_pad_to_square may prune and clip boxes,
+    # we only need to find one of the boxes that was not clipped and check
+    # that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
+    # is equivalent to round(a-b, 7) == 0.
+    any_box_has_correct_size = False
+    effective_scale_y = int(scale * 512) / 512.0
+    effective_scale_x = int(scale * 256) / 512.0
+    expected_size_y = 0.1 * effective_scale_y
+    expected_size_x = 0.1 * effective_scale_x
+    for box in boxes:
+      ymin, xmin, ymax, xmax = box
+      any_box_has_correct_size |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_box_has_correct_size)
+    # Similar to the approach above where we check for at least one box with the
+    # expected dimensions, we check for at least one pair of keypoints whose
+    # distance matches the expected dimensions.
+    any_keypoint_pair_has_correct_dist = False
+    for keypoint_pair in keypoints:
+      ymin, xmin = keypoint_pair[0]
+      ymax, xmax = keypoint_pair[1]
+      any_keypoint_pair_has_correct_dist |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_keypoint_pair_has_correct_dist)
+    self.assertAlmostEqual(512.0, image.shape[0])
+    self.assertAlmostEqual(512.0, image.shape[1])
    self.assertAllClose(image[:, :, 0],
                        masks[0, :, :])

--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -141,6 +141,8 @@ class DetectionResultFields(object):
      for detection boxes in the image including background class.
    detection_classes: detection-level class labels.
    detection_masks: contains a segmentation mask for each detection box.
+    detection_surface_coords: contains DensePose surface coordinates for each
+      box.
    detection_boundaries: contains an object boundary for each detection box.
    detection_keypoints: contains detection keypoints for each detection box.
    detection_keypoint_scores: contains detection keypoint scores.
@@ -161,6 +163,7 @@ class DetectionResultFields(object):
  detection_features = 'detection_features'
  detection_classes = 'detection_classes'
  detection_masks = 'detection_masks'
+  detection_surface_coords = 'detection_surface_coords'
  detection_boundaries = 'detection_boundaries'
  detection_keypoints = 'detection_keypoints'
  detection_keypoint_scores = 'detection_keypoint_scores'
@@ -182,7 +185,11 @@ class BoxListFields(object):
    masks: masks per bounding box.
    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
+    keypoint_visibilities: keypoint visibilities per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
+    densepose_num_points: number of DensePose points per bounding box.
+    densepose_part_ids: DensePose part ids per bounding box.
+    densepose_surface_coords: DensePose surface coordinates per bounding box.
    is_crowd: is_crowd annotation per bounding box.
  """
  boxes = 'boxes'
@@ -196,6 +203,9 @@ class BoxListFields(object):
  keypoints = 'keypoints'
  keypoint_visibilities = 'keypoint_visibilities'
  keypoint_heatmaps = 'keypoint_heatmaps'
+  densepose_num_points = 'densepose_num_points'
+  densepose_part_ids = 'densepose_part_ids'
+  densepose_surface_coords = 'densepose_surface_coords'
  is_crowd = 'is_crowd'
  group_of = 'group_of'

--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -50,13 +50,16 @@ import io
 import itertools
 import json
 import os
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import six
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class ReKeyDataFn(beam.DoFn):
  """Re-keys tfrecords by sequence_key.

--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
@@ -22,7 +22,7 @@ import datetime
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples
 from object_detection.utils import tf_version
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 @contextlib.contextmanager
 def InMemoryTFRecord(entries):
  temp = tempfile.NamedTemporaryFile(delete=False)

--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -39,12 +39,16 @@ import io
 import json
 import logging
 import os
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import tensorflow.compat.v1 as tf
 from object_detection.utils import dataset_util
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class ParseImage(beam.DoFn):
  """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""

--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
@@ -22,7 +22,6 @@ import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 from PIL import Image
@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf
 from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
 from object_detection.utils import tf_version
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):

--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -48,8 +48,11 @@ from __future__ import print_function
 import argparse
 import os
 import threading
-import apache_beam as beam
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class GenerateDetectionDataFn(beam.DoFn):

--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -22,7 +22,6 @@ import contextlib
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -39,6 +38,11 @@ if six.PY2:
 else:
  mock = unittest.mock
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""

--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
    --input_type tf_example \
    --pipeline_config_path path/to/faster_rcnn_model.config \
    --trained_checkpoint_prefix path/to/model.ckpt \
-    --output_directory path/to/exported_model_directory
+    --output_directory path/to/exported_model_directory \
+    --additional_output_tensor_names detection_features
 python generate_embedding_data.py \
    --alsologtostderr \
@@ -52,11 +53,15 @@ import datetime
 import os
 import threading
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class GenerateEmbeddingDataFn(beam.DoFn):
  """Generates embedding data for camera trap images.

--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -21,7 +21,6 @@ import contextlib
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -38,6 +37,11 @@ if six.PY2:
 else:
  mock = unittest.mock
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""

--- a/research/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation.py
@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMin.as_matrix()),
+              filtered_data_frame_boxes.YMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMin.as_matrix()),
+              filtered_data_frame_boxes.XMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMax.as_matrix()),
+              filtered_data_frame_boxes.YMax.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMax.as_matrix()),
+              filtered_data_frame_boxes.XMax.to_numpy()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature([
              six.ensure_binary(label_text)
-              for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
+              for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
          ]),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.LabelName.map(
-                  lambda x: label_map[x]).as_matrix()),
+                  lambda x: label_map[x]).to_numpy()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature(
              six.ensure_binary('{}.jpg'.format(image_id))),
@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
+                    filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
+                    filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
+                    filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
                        int))
  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
    feature_map[standard_fields.TfExampleFields.
                image_class_label] = dataset_util.int64_list_feature(
                    filtered_data_frame_labels.LabelName.map(
-                        lambda x: label_map[x]).as_matrix())
+                        lambda x: label_map[x]).to_numpy())
    feature_map[standard_fields.TfExampleFields
                .image_class_text] = dataset_util.bytes_list_feature([
                    six.ensure_binary(label_text) for label_text in
-                    filtered_data_frame_labels.LabelName.as_matrix()
+                    filtered_data_frame_labels.LabelName.to_numpy()
                ]),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
  detection_boxes, detection_masks, image_shape = args
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, image_shape[0], image_shape[1])
-  return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
+  # If the masks are currently float, binarize them. Otherwise keep them as
+  # integers, since they have already been thresholded.
+  if detection_masks_reframed.dtype == tf.float32:
+    detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
+  return tf.cast(detection_masks_reframed, tf.uint8)
 def _resize_groundtruth_masks(args):
@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
  return tf.cast(tf.squeeze(mask, 3), tf.uint8)
+def _resize_surface_coordinate_masks(args):
+  detection_boxes, surface_coords, image_shape = args
+  surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
+  surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
+  surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
+  return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
+                  axis=-1)
 def _scale_keypoint_to_absolute(args):
  keypoints, image_shape = args
  return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
        num_keypoints] bool tensor with keypoint visibilities (Optional).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes. (Optional)
+      'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
+        tensor. (Optional)
+      'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
+        max_sampled_points] int32 tensor. (Optional)
+      'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
+        max_sampled_points, 4] float32 tensor. (Optional)
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
-    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
+    'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
-      binarized masks, reframed to full image masks. (Optional)
+      instance masks, reframed to full image masks. Note that these may be
+      binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
    'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
      float32 tensor containing keypoint coordinates. (Optional)
    'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
      float32 tensor containing keypoint scores. (Optional)
+    'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
+      tensor with normalized surface coordinates (e.g. DensePose UV
+      coordinates). (Optional)
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,
  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks]
-    # TODO(rathodv): This should be done in model's postprocess
-    # function ideally.
    output_dict[detection_fields.detection_masks] = (
        shape_utils.static_or_dynamic_map_fn(
            _resize_detection_masks,
            elems=[detection_boxes, detection_masks,
                   original_image_spatial_shapes],
            dtype=tf.uint8))
+    if detection_fields.detection_surface_coords in detections:
+      detection_surface_coords = detections[
+          detection_fields.detection_surface_coords]
+      output_dict[detection_fields.detection_surface_coords] = (
+          shape_utils.static_or_dynamic_map_fn(
+              _resize_surface_coordinate_masks,
+              elems=[detection_boxes, detection_surface_coords,
+                     original_image_spatial_shapes],
+              dtype=tf.float32))
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints]
@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
          'recall_upper_bound': (eval_config.recall_upper_bound)
      }
  return evaluator_options
+def has_densepose(eval_dict):
+  return (fields.DetectionResultFields.detection_masks in eval_dict and
+          fields.DetectionResultFields.detection_surface_coords in eval_dict)
--- a/research/object_detection/meta_architectures/center_net_meta_arch.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch.py
--- a/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/center_net_meta_arch_tf2_test.py
@@ -266,7 +266,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
      masks_np[0, :, :3, 1] = 1  # Class 1.
      masks = tf.constant(masks_np)
      true_image_shapes = tf.constant([[6, 8, 3]])
-      instance_masks = cnma.convert_strided_predictions_to_instance_masks(
+      instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
          boxes, classes, masks, stride=2, mask_height=2, mask_width=2,
          true_image_shapes=true_image_shapes)
      return instance_masks
@@ -289,6 +289,104 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
        ])
    np.testing.assert_array_equal(expected_instance_masks, instance_masks)
+  def test_convert_strided_predictions_raises_error_with_one_tensor(self):
+    def graph_fn():
+      boxes = tf.constant(
+          [
+              [[0.5, 0.5, 1.0, 1.0],
+               [0.0, 0.5, 0.5, 1.0],
+               [0.0, 0.0, 0.0, 0.0]],
+          ], tf.float32)
+      classes = tf.constant(
+          [
+              [0, 1, 0],
+          ], tf.int32)
+      masks_np = np.zeros((1, 4, 4, 2), dtype=np.float32)
+      masks_np[0, :, 2:, 0] = 1  # Class 0.
+      masks_np[0, :, :3, 1] = 1  # Class 1.
+      masks = tf.constant(masks_np)
+      true_image_shapes = tf.constant([[6, 8, 3]])
+      densepose_part_heatmap = tf.random.uniform(
+          [1, 4, 4, 24])
+      instance_masks, _ = cnma.convert_strided_predictions_to_instance_masks(
+          boxes, classes, masks, true_image_shapes,
+          densepose_part_heatmap=densepose_part_heatmap,
+          densepose_surface_coords=None)
+      return instance_masks
+    with self.assertRaises(ValueError):
+      self.execute_cpu(graph_fn, [])
+  def test_crop_and_threshold_masks(self):
+    boxes_np = np.array(
+        [[0., 0., 0.5, 0.5],
+         [0.25, 0.25, 1.0, 1.0]], dtype=np.float32)
+    classes_np = np.array([0, 2], dtype=np.int32)
+    masks_np = np.zeros((4, 4, _NUM_CLASSES), dtype=np.float32)
+    masks_np[0, 0, 0] = 0.8
+    masks_np[1, 1, 0] = 0.6
+    masks_np[3, 3, 2] = 0.7
+    part_heatmap_np = np.zeros((4, 4, _DENSEPOSE_NUM_PARTS), dtype=np.float32)
+    part_heatmap_np[0, 0, 4] = 1
+    part_heatmap_np[0, 0, 2] = 0.6  # Lower scoring.
+    part_heatmap_np[1, 1, 8] = 0.2
+    part_heatmap_np[3, 3, 4] = 0.5
+    surf_coords_np = np.zeros((4, 4, 2 * _DENSEPOSE_NUM_PARTS),
+                              dtype=np.float32)
+    surf_coords_np[:, :, 8:10] = 0.2, 0.9
+    surf_coords_np[:, :, 16:18] = 0.3, 0.5
+    true_height, true_width = 10, 10
+    input_height, input_width = 10, 10
+    mask_height = 4
+    mask_width = 4
+    def graph_fn():
+      elems = [
+          tf.constant(boxes_np),
+          tf.constant(classes_np),
+          tf.constant(masks_np),
+          tf.constant(part_heatmap_np),
+          tf.constant(surf_coords_np),
+          tf.constant(true_height, dtype=tf.int32),
+          tf.constant(true_width, dtype=tf.int32)
+      ]
+      part_masks, surface_coords = cnma.crop_and_threshold_masks(
+          elems, input_height, input_width, mask_height=mask_height,
+          mask_width=mask_width, densepose_class_index=0)
+      return part_masks, surface_coords
+    part_masks, surface_coords = self.execute_cpu(graph_fn, [])
+    expected_part_masks = np.zeros((2, 4, 4), dtype=np.uint8)
+    expected_part_masks[0, 0, 0] = 5  # Recall classes are 1-indexed in output.
+    expected_part_masks[0, 2, 2] = 9  # Recall classes are 1-indexed in output.
+    expected_part_masks[1, 3, 3] = 1  # Standard instance segmentation mask.
+    expected_surface_coords = np.zeros((2, 4, 4, 2), dtype=np.float32)
+    expected_surface_coords[0, 0, 0, :] = 0.2, 0.9
+    expected_surface_coords[0, 2, 2, :] = 0.3, 0.5
+    np.testing.assert_allclose(expected_part_masks, part_masks)
+    np.testing.assert_allclose(expected_surface_coords, surface_coords)
+  def test_gather_surface_coords_for_parts(self):
+    surface_coords_cropped_np = np.zeros((2, 5, 5, _DENSEPOSE_NUM_PARTS, 2),
+                                         dtype=np.float32)
+    surface_coords_cropped_np[0, 0, 0, 5] = 0.3, 0.4
+    surface_coords_cropped_np[0, 1, 0, 9] = 0.5, 0.6
+    highest_scoring_part_np = np.zeros((2, 5, 5), dtype=np.int32)
+    highest_scoring_part_np[0, 0, 0] = 5
+    highest_scoring_part_np[0, 1, 0] = 9
+    def graph_fn():
+      surface_coords_cropped = tf.constant(surface_coords_cropped_np,
+                                           tf.float32)
+      highest_scoring_part = tf.constant(highest_scoring_part_np, tf.int32)
+      surface_coords_gathered = cnma.gather_surface_coords_for_parts(
+          surface_coords_cropped, highest_scoring_part)
+      return surface_coords_gathered
+    surface_coords_gathered = self.execute_cpu(graph_fn, [])
+    np.testing.assert_allclose([0.3, 0.4], surface_coords_gathered[0, 0, 0])
+    np.testing.assert_allclose([0.5, 0.6], surface_coords_gathered[0, 1, 0])
  def test_top_k_feature_map_locations(self):
    feature_map_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
    feature_map_np[0, 2, 0, 1] = 1.0
@@ -535,6 +633,8 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
    keypoint_heatmap_np[1, 0, 1, 1] = 0.9
    keypoint_heatmap_np[1, 2, 0, 1] = 0.8
+    # Note that the keypoint offsets are now per keypoint (as opposed to
+    # keypoint agnostic, in the test test_keypoint_candidate_prediction).
    keypoint_heatmap_offsets_np = np.zeros((2, 3, 3, 4), dtype=np.float32)
    keypoint_heatmap_offsets_np[0, 0, 0] = [0.5, 0.25, 0.0, 0.0]
    keypoint_heatmap_offsets_np[0, 2, 1] = [-0.25, 0.5, 0.0, 0.0]
@@ -949,6 +1049,7 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
 _NUM_CLASSES = 10
 _KEYPOINT_INDICES = [0, 1, 2, 3]
 _NUM_KEYPOINTS = len(_KEYPOINT_INDICES)
+_DENSEPOSE_NUM_PARTS = 24
 _TASK_NAME = 'human_pose'
@@ -991,6 +1092,20 @@ def get_fake_mask_params():
      mask_width=4)
+def get_fake_densepose_params():
+  """Returns the fake DensePose estimation parameter namedtuple."""
+  return cnma.DensePoseParams(
+      class_id=1,
+      classification_loss=losses.WeightedSoftmaxClassificationLoss(),
+      localization_loss=losses.L1LocalizationLoss(),
+      part_loss_weight=1.0,
+      coordinate_loss_weight=1.0,
+      num_parts=_DENSEPOSE_NUM_PARTS,
+      task_loss_weight=1.0,
+      upsample_to_input_res=True,
+      upsample_method='nearest')
 def build_center_net_meta_arch(build_resnet=False):
  """Builds the CenterNet meta architecture."""
  if build_resnet:
@@ -1018,7 +1133,8 @@ def build_center_net_meta_arch(build_resnet=False):
      object_center_params=get_fake_center_params(),
      object_detection_params=get_fake_od_params(),
      keypoint_params_dict={_TASK_NAME: get_fake_kp_params()},
-      mask_params=get_fake_mask_params())
+      mask_params=get_fake_mask_params(),
+      densepose_params=get_fake_densepose_params())
 def _logit(p):
@@ -1102,6 +1218,16 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
        fake_feature_map)
    self.assertEqual((4, 128, 128, _NUM_CLASSES), output.shape)
+    # "densepose parts" head:
+    output = model._prediction_head_dict[cnma.DENSEPOSE_HEATMAP][-1](
+        fake_feature_map)
+    self.assertEqual((4, 128, 128, _DENSEPOSE_NUM_PARTS), output.shape)
+    # "densepose surface coordinates" head:
+    output = model._prediction_head_dict[cnma.DENSEPOSE_REGRESSION][-1](
+        fake_feature_map)
+    self.assertEqual((4, 128, 128, 2 * _DENSEPOSE_NUM_PARTS), output.shape)
  def test_initialize_target_assigners(self):
    model = build_center_net_meta_arch()
    assigner_dict = model._initialize_target_assigners(
@@ -1125,6 +1251,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    self.assertIsInstance(assigner_dict[cnma.SEGMENTATION_TASK],
                          cn_assigner.CenterNetMaskTargetAssigner)
+    # DensePose estimation target assigner:
+    self.assertIsInstance(assigner_dict[cnma.DENSEPOSE_TASK],
+                          cn_assigner.CenterNetDensePoseTargetAssigner)
  def test_predict(self):
    """Test the predict function."""
@@ -1145,6 +1275,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
                     (2, 32, 32, 2))
    self.assertEqual(prediction_dict[cnma.SEGMENTATION_HEATMAP][0].shape,
                     (2, 32, 32, _NUM_CLASSES))
+    self.assertEqual(prediction_dict[cnma.DENSEPOSE_HEATMAP][0].shape,
+                     (2, 32, 32, _DENSEPOSE_NUM_PARTS))
+    self.assertEqual(prediction_dict[cnma.DENSEPOSE_REGRESSION][0].shape,
+                     (2, 32, 32, 2 * _DENSEPOSE_NUM_PARTS))
  def test_loss(self):
    """Test the loss function."""
@@ -1157,7 +1291,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
        groundtruth_keypoints_list=groundtruth_dict[
            fields.BoxListFields.keypoints],
        groundtruth_masks_list=groundtruth_dict[
-            fields.BoxListFields.masks])
+            fields.BoxListFields.masks],
+        groundtruth_dp_num_points_list=groundtruth_dict[
+            fields.BoxListFields.densepose_num_points],
+        groundtruth_dp_part_ids_list=groundtruth_dict[
+            fields.BoxListFields.densepose_part_ids],
+        groundtruth_dp_surface_coords_list=groundtruth_dict[
+            fields.BoxListFields.densepose_surface_coords])
    prediction_dict = get_fake_prediction_dict(
        input_height=16, input_width=32, stride=4)
@@ -1193,6 +1333,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    self.assertGreater(
        0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
                                   cnma.SEGMENTATION_HEATMAP)])
+    self.assertGreater(
+        0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
+                                   cnma.DENSEPOSE_HEATMAP)])
+    self.assertGreater(
+        0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
+                                   cnma.DENSEPOSE_REGRESSION)])
  @parameterized.parameters(
      {'target_class_id': 1},
@@ -1230,6 +1376,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    segmentation_heatmap[:, 14:18, 14:18, target_class_id] = 1.0
    segmentation_heatmap = _logit(segmentation_heatmap)
+    dp_part_ind = 4
+    dp_part_heatmap = np.zeros((1, 32, 32, _DENSEPOSE_NUM_PARTS),
+                               dtype=np.float32)
+    dp_part_heatmap[0, 14:18, 14:18, dp_part_ind] = 1.0
+    dp_part_heatmap = _logit(dp_part_heatmap)
+    dp_surf_coords = np.random.randn(1, 32, 32, 2 * _DENSEPOSE_NUM_PARTS)
    class_center = tf.constant(class_center)
    height_width = tf.constant(height_width)
    offset = tf.constant(offset)
@@ -1237,6 +1391,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    keypoint_offsets = tf.constant(keypoint_offsets, dtype=tf.float32)
    keypoint_regression = tf.constant(keypoint_regression, dtype=tf.float32)
    segmentation_heatmap = tf.constant(segmentation_heatmap, dtype=tf.float32)
+    dp_part_heatmap = tf.constant(dp_part_heatmap, dtype=tf.float32)
+    dp_surf_coords = tf.constant(dp_surf_coords, dtype=tf.float32)
    prediction_dict = {
        cnma.OBJECT_CENTER: [class_center],
@@ -1249,6 +1405,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
        cnma.get_keypoint_name(_TASK_NAME, cnma.KEYPOINT_REGRESSION):
            [keypoint_regression],
        cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap],
+        cnma.DENSEPOSE_HEATMAP: [dp_part_heatmap],
+        cnma.DENSEPOSE_REGRESSION: [dp_surf_coords]
    }
    def graph_fn():
@@ -1271,12 +1429,13 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
    self.assertAllEqual([1, max_detection, 4, 4],
                        detections['detection_masks'].shape)
-    # There should be some section of the first mask (correspond to the only
+    # Masks should be empty for everything but the first detection.
-    # detection) with non-zero mask values.
-    self.assertGreater(np.sum(detections['detection_masks'][0, 0, :, :] > 0), 0)
    self.assertAllEqual(
        detections['detection_masks'][0, 1:, :, :],
        np.zeros_like(detections['detection_masks'][0, 1:, :, :]))
+    self.assertAllEqual(
+        detections['detection_surface_coords'][0, 1:, :, :],
+        np.zeros_like(detections['detection_surface_coords'][0, 1:, :, :]))
    if target_class_id == 1:
      expected_kpts_for_obj_0 = np.array(
@@ -1287,6 +1446,12 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
                                 expected_kpts_for_obj_0, rtol=1e-6)
      np.testing.assert_allclose(detections['detection_keypoint_scores'][0][0],
                                 expected_kpt_scores_for_obj_0, rtol=1e-6)
+      # First detection has DensePose parts.
+      self.assertSameElements(
+          np.unique(detections['detection_masks'][0, 0, :, :]),
+          set([0, dp_part_ind + 1]))
+      self.assertGreater(np.sum(np.abs(detections['detection_surface_coords'])),
+                         0.0)
    else:
      # All keypoint outputs should be zeros.
      np.testing.assert_allclose(
@@ -1297,6 +1462,14 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
          detections['detection_keypoint_scores'][0][0],
          np.zeros([num_keypoints], np.float),
          rtol=1e-6)
+      # Binary segmentation mask.
+      self.assertSameElements(
+          np.unique(detections['detection_masks'][0, 0, :, :]),
+          set([0, 1]))
+      # No DensePose surface coordinates.
+      np.testing.assert_allclose(
+          detections['detection_surface_coords'][0, 0, :, :],
+          np.zeros_like(detections['detection_surface_coords'][0, 0, :, :]))
  def test_get_instance_indices(self):
    classes = tf.constant([[0, 1, 2, 0], [2, 1, 2, 2]], dtype=tf.int32)
@@ -1353,6 +1526,17 @@ def get_fake_prediction_dict(input_height, input_width, stride):
  mask_heatmap[0, 2, 4, 1] = 1.0
  mask_heatmap = _logit(mask_heatmap)
+  densepose_heatmap = np.zeros((2, output_height, output_width,
+                                _DENSEPOSE_NUM_PARTS), dtype=np.float32)
+  densepose_heatmap[0, 2, 4, 5] = 1.0
+  densepose_heatmap = _logit(densepose_heatmap)
+  densepose_regression = np.zeros((2, output_height, output_width,
+                                   2 * _DENSEPOSE_NUM_PARTS), dtype=np.float32)
+  # The surface coordinate indices for part index 5 are:
+  # (5 * 2, 5 * 2 + 1), or (10, 11).
+  densepose_regression[0, 2, 4, 10:12] = 0.4, 0.7
  prediction_dict = {
      'preprocessed_inputs':
          tf.zeros((2, input_height, input_width, 3)),
@@ -1383,6 +1567,14 @@ def get_fake_prediction_dict(input_height, input_width, stride):
      cnma.SEGMENTATION_HEATMAP: [
          tf.constant(mask_heatmap),
          tf.constant(mask_heatmap)
+      ],
+      cnma.DENSEPOSE_HEATMAP: [
+          tf.constant(densepose_heatmap),
+          tf.constant(densepose_heatmap),
+      ],
+      cnma.DENSEPOSE_REGRESSION: [
+          tf.constant(densepose_regression),
+          tf.constant(densepose_regression),
      ]
  }
  return prediction_dict
@@ -1427,12 +1619,30 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
      tf.constant(mask),
      tf.zeros_like(mask),
  ]
+  densepose_num_points = [
+      tf.constant([1], dtype=tf.int32),
+      tf.constant([0], dtype=tf.int32),
+  ]
+  densepose_part_ids = [
+      tf.constant([[5, 0, 0]], dtype=tf.int32),
+      tf.constant([[0, 0, 0]], dtype=tf.int32),
+  ]
+  densepose_surface_coords_np = np.zeros((1, 3, 4), dtype=np.float32)
+  densepose_surface_coords_np[0, 0, :] = 0.55, 0.55, 0.4, 0.7
+  densepose_surface_coords = [
+      tf.constant(densepose_surface_coords_np),
+      tf.zeros_like(densepose_surface_coords_np)
+  ]
  groundtruth_dict = {
      fields.BoxListFields.boxes: boxes,
      fields.BoxListFields.weights: weights,
      fields.BoxListFields.classes: classes,
      fields.BoxListFields.keypoints: keypoints,
      fields.BoxListFields.masks: masks,
+      fields.BoxListFields.densepose_num_points: densepose_num_points,
+      fields.BoxListFields.densepose_part_ids: densepose_part_ids,
+      fields.BoxListFields.densepose_surface_coords:
+          densepose_surface_coords,
      fields.InputDataFields.groundtruth_labeled_classes: labeled_classes,
  }
  return groundtruth_dict