resovle merge conflicts

31ca3b97 · Kaushik Shivakumar · 3e9d886d · 7fcd7cba · 31ca3b97 · 31ca3b97
Commit 31ca3b97 authored Jul 23, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/core/densepose_ops_test.py
+++ b/research/object_detection/core/densepose_ops_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.densepose_ops."""
+import numpy as np
+import tensorflow.compat.v1 as tf
+
+from object_detection.core import densepose_ops
+from object_detection.utils import test_case
+
+
+class DensePoseOpsTest(test_case.TestCase):
+  """Tests for common DensePose operations."""
+
+  def test_scale(self):
+    def graph_fn():
+      dp_surface_coords = tf.constant([
+          [[0.0, 0.0, 0.1, 0.2], [100.0, 200.0, 0.3, 0.4]],
+          [[50.0, 120.0, 0.5, 0.6], [100.0, 140.0, 0.7, 0.8]]
+      ])
+      y_scale = tf.constant(1.0 / 100)
+      x_scale = tf.constant(1.0 / 200)
+
+      output = densepose_ops.scale(dp_surface_coords, y_scale, x_scale)
+      return output
+    output = self.execute(graph_fn, [])
+
+    expected_dp_surface_coords = np.array([
+        [[0., 0., 0.1, 0.2], [1.0, 1.0, 0.3, 0.4]],
+        [[0.5, 0.6, 0.5, 0.6], [1.0, 0.7, 0.7, 0.8]]
+    ])
+    self.assertAllClose(output, expected_dp_surface_coords)
+
+  def test_clip_to_window(self):
+    def graph_fn():
+      dp_surface_coords = tf.constant([
+          [[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+          [[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
+      ])
+      window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+      output = densepose_ops.clip_to_window(dp_surface_coords, window)
+      return output
+    output = self.execute(graph_fn, [])
+
+    expected_dp_surface_coords = np.array([
+        [[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+        [[0.5, 0.25, 0.5, 0.6], [0.75, 0.75, 0.7, 0.8]]
+    ])
+    self.assertAllClose(output, expected_dp_surface_coords)
+
+  def test_prune_outside_window(self):
+    def graph_fn():
+      dp_num_points = tf.constant([2, 0, 1])
+      dp_part_ids = tf.constant([[1, 1], [0, 0], [16, 0]])
+      dp_surface_coords = tf.constant([
+          [[0.9, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+          [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+          [[0.8, 0.5, 0.6, 0.6], [0.5, 0.5, 0.7, 0.7]]
+      ])
+      window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+      new_dp_num_points, new_dp_part_ids, new_dp_surface_coords = (
+          densepose_ops.prune_outside_window(dp_num_points, dp_part_ids,
+                                             dp_surface_coords, window))
+      return new_dp_num_points, new_dp_part_ids, new_dp_surface_coords
+    new_dp_num_points, new_dp_part_ids, new_dp_surface_coords = (
+        self.execute_cpu(graph_fn, []))
+
+    expected_dp_num_points = np.array([1, 0, 0])
+    expected_dp_part_ids = np.array([[1], [0], [0]])
+    expected_dp_surface_coords = np.array([
+        [[0.75, 0.75, 0.3, 0.4]],
+        [[0.0, 0.0, 0.0, 0.0]],
+        [[0.0, 0.0, 0.0, 0.0]]
+    ])
+    self.assertAllEqual(new_dp_num_points, expected_dp_num_points)
+    self.assertAllEqual(new_dp_part_ids, expected_dp_part_ids)
+    self.assertAllClose(new_dp_surface_coords, expected_dp_surface_coords)
+
+  def test_change_coordinate_frame(self):
+    def graph_fn():
+      dp_surface_coords = tf.constant([
+          [[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+          [[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
+      ])
+      window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+      output = densepose_ops.change_coordinate_frame(dp_surface_coords, window)
+      return output
+    output = self.execute(graph_fn, [])
+
+    expected_dp_surface_coords = np.array([
+        [[0, 0.5, 0.1, 0.2], [1.0, 1.0, 0.3, 0.4]],
+        [[0.5, -0.5, 0.5, 0.6], [1.5, 1.5, 0.7, 0.8]]
+    ])
+    self.assertAllClose(output, expected_dp_surface_coords)
+
+  def test_to_normalized_coordinates(self):
+    def graph_fn():
+      dp_surface_coords = tf.constant([
+          [[10., 30., 0.1, 0.2], [30., 45., 0.3, 0.4]],
+          [[20., 0., 0.5, 0.6], [40., 60., 0.7, 0.8]]
+      ])
+      output = densepose_ops.to_normalized_coordinates(
+          dp_surface_coords, 40, 60)
+      return output
+    output = self.execute(graph_fn, [])
+
+    expected_dp_surface_coords = np.array([
+        [[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+        [[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
+    ])
+    self.assertAllClose(output, expected_dp_surface_coords)
+
+  def test_to_absolute_coordinates(self):
+    def graph_fn():
+      dp_surface_coords = tf.constant([
+          [[0.25, 0.5, 0.1, 0.2], [0.75, 0.75, 0.3, 0.4]],
+          [[0.5, 0.0, 0.5, 0.6], [1.0, 1.0, 0.7, 0.8]]
+      ])
+      output = densepose_ops.to_absolute_coordinates(
+          dp_surface_coords, 40, 60)
+      return output
+    output = self.execute(graph_fn, [])
+
+    expected_dp_surface_coords = np.array([
+        [[10., 30., 0.1, 0.2], [30., 45., 0.3, 0.4]],
+        [[20., 0., 0.5, 0.6], [40., 60., 0.7, 0.8]]
+    ])
+    self.assertAllClose(output, expected_dp_surface_coords)
+
+  def test_horizontal_flip(self):
+    part_ids_np = np.array([[1, 4], [0, 8]], dtype=np.int32)
+    surf_coords_np = np.array([
+        [[0.1, 0.7, 0.2, 0.4], [0.3, 0.8, 0.2, 0.4]],
+        [[0.0, 0.5, 0.8, 0.7], [0.6, 1.0, 0.7, 0.9]],
+    ], dtype=np.float32)
+    def graph_fn():
+      part_ids = tf.constant(part_ids_np, dtype=tf.int32)
+      surf_coords = tf.constant(surf_coords_np, dtype=tf.float32)
+      flipped_part_ids, flipped_surf_coords = densepose_ops.flip_horizontal(
+          part_ids, surf_coords)
+      flipped_twice_part_ids, flipped_twice_surf_coords = (
+          densepose_ops.flip_horizontal(flipped_part_ids, flipped_surf_coords))
+      return (flipped_part_ids, flipped_surf_coords,
+              flipped_twice_part_ids, flipped_twice_surf_coords)
+    (flipped_part_ids, flipped_surf_coords, flipped_twice_part_ids,
+     flipped_twice_surf_coords) = self.execute(graph_fn, [])
+
+    expected_flipped_part_ids = [[1, 5],  # 1->1, 4->5
+                                 [0, 9]]  # 0->0, 8->9
+    expected_flipped_surf_coords_yx = np.array([
+        [[0.1, 1.0-0.7], [0.3, 1.0-0.8]],
+        [[0.0, 1.0-0.5], [0.6, 1.0-1.0]],
+    ], dtype=np.float32)
+    self.assertAllEqual(expected_flipped_part_ids, flipped_part_ids)
+    self.assertAllClose(expected_flipped_surf_coords_yx,
+                        flipped_surf_coords[:, :, 0:2])
+    self.assertAllEqual(part_ids_np, flipped_twice_part_ids)
+    self.assertAllClose(surf_coords_np, flipped_twice_surf_coords, rtol=1e-2,
+                        atol=1e-2)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*}
        fields.InputDataFields.is_annotated.

    Returns:
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*} or
        fields.InputDataFields.is_annotated.

    Returns:
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        detection_classes: [batch, max_detections]
          (If a model is producing class-agnostic detections, this field may be
          missing)
-        instance_masks: [batch, max_detections, image_height, image_width]
+        detection_masks: [batch, max_detections, mask_height, mask_width]
          (optional)
-        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        detection_keypoints: [batch, max_detections, num_keypoints, 2]
+          (optional)
+        detection_keypoint_scores: [batch, max_detections, num_keypoints]
+          (optional)
+        detection_surface_coords: [batch, max_detections, mask_height,
+          mask_width, 2] (optional)
        num_detections: [batch]

        In addition to the above fields this stage also outputs the following
@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass

-  def provide_groundtruth(self,
-                          groundtruth_boxes_list,
-                          groundtruth_classes_list,
-                          groundtruth_masks_list=None,
-                          groundtruth_keypoints_list=None,
-                          groundtruth_keypoint_visibilities_list=None,
-                          groundtruth_weights_list=None,
-                          groundtruth_confidences_list=None,
-                          groundtruth_is_crowd_list=None,
-                          groundtruth_group_of_list=None,
-                          groundtruth_area_list=None,
-                          is_annotated_list=None,
-                          groundtruth_labeled_classes=None):
+  def provide_groundtruth(
+      self,
+      groundtruth_boxes_list,
+      groundtruth_classes_list,
+      groundtruth_masks_list=None,
+      groundtruth_keypoints_list=None,
+      groundtruth_keypoint_visibilities_list=None,
+      groundtruth_dp_num_points_list=None,
+      groundtruth_dp_part_ids_list=None,
+      groundtruth_dp_surface_coords_list=None,
+      groundtruth_weights_list=None,
+      groundtruth_confidences_list=None,
+      groundtruth_is_crowd_list=None,
+      groundtruth_group_of_list=None,
+      groundtruth_area_list=None,
+      is_annotated_list=None,
+      groundtruth_labeled_classes=None):
    """Provide groundtruth tensors.

    Args:
@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        `groundtruth_keypoint_visibilities_list`).
      groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
        of shape [num_boxes, num_keypoints] containing keypoint visibilities.
+      groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
+        [num_boxes] containing the number of DensePose sampled points.
+      groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding.
+      groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, max_sampled_points, 4] containing the DensePose
+        surface coordinates for each sampled point. Note that there may be
+        padding.
      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
        [num_boxes] containing weights for groundtruth boxes.
      groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      self._groundtruth_lists[
          fields.BoxListFields.keypoint_visibilities] = (
              groundtruth_keypoint_visibilities_list)
+    if groundtruth_dp_num_points_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_num_points] = (
+              groundtruth_dp_num_points_list)
+    if groundtruth_dp_part_ids_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_part_ids] = (
+              groundtruth_dp_part_ids_list)
+    if groundtruth_dp_surface_coords_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_surface_coords] = (
+              groundtruth_dp_surface_coords_list)
    if groundtruth_is_crowd_list:
      self._groundtruth_lists[
          fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
@@ -391,7 +421,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    pass

  @abc.abstractmethod
-  def restore_map(self, fine_tune_checkpoint_type='detection'):
+  def restore_map(self,
+                  fine_tune_checkpoint_type='detection',
+                  load_all_detection_checkpoint_vars=False):
    """Returns a map of variables to load from a foreign checkpoint.

    Returns a map of variable names to load from a checkpoint to variables in
@@ -407,6 +439,9 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.
        Valid values: `detection`, `classification`. Default 'detection'.
+      load_all_detection_checkpoint_vars: whether to load all variables (when
+         `fine_tune_checkpoint_type` is `detection`). If False, only variables
+         within the feature extractor scope are included. Default False.

    Returns:
      A dict mapping variable names (to load from a checkpoint) to variables in
@@ -414,6 +449,36 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass

+  @abc.abstractmethod
+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of variables to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (above) is intended
+    to be used to restore Slim-based models when running Tensorflow 1.x.
+
+    TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
+    error for both restore_map and restore_from_objects depending on version.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """
+    pass
+
  @abc.abstractmethod
  def updates(self):
    """Returns a list of update operators for this model.

--- a/research/object_detection/core/model_test.py
+++ b/research/object_detection/core/model_test.py
@@ -57,6 +57,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self):
    return {}

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def regularization_losses(self):
    return []


--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -79,6 +79,7 @@ import tensorflow.compat.v1 as tf
 from tensorflow.python.ops import control_flow_ops
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
+from object_detection.core import densepose_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import preprocessor_cache
 from object_detection.core import standard_fields as fields
@@ -568,6 +569,8 @@ def random_horizontal_flip(image,
                           masks=None,
                           keypoints=None,
                           keypoint_visibilities=None,
+                           densepose_part_ids=None,
+                           densepose_surface_coords=None,
                           keypoint_flip_permutation=None,
                           probability=0.5,
                           seed=None,
@@ -589,6 +592,16 @@ def random_horizontal_flip(image,
               normalized coordinates.
    keypoint_visibilities: (optional) rank 2 bool tensor with shape
                           [num_instances, num_keypoints].
+    densepose_part_ids: (optional) rank 2 int32 tensor with shape
+                        [num_instances, num_points] holding the part id for each
+                        sampled point. These part_ids are 0-indexed, where the
+                        first non-background part has index 0.
+    densepose_surface_coords: (optional) rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4]. The DensePose
+                              coordinates are of the form (y, x, v, u)  where
+                              (y, x) are the normalized image coordinates for a
+                              sampled point, and (v, u) is the surface
+                              coordinate for the part.
    keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
                               permutation.
    probability: the probability of performing this augmentation.
@@ -601,9 +614,9 @@ def random_horizontal_flip(image,
  Returns:
    image: image which is the same shape as input image.

-    If boxes, masks, keypoints, keypoint_visibilities, and
-    keypoint_flip_permutation are not None,the function also returns the
-    following tensors.
+    If boxes, masks, keypoints, keypoint_visibilities,
+    keypoint_flip_permutation, densepose_part_ids, or densepose_surface_coords
+    are not None,the function also returns the following tensors.

    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
@@ -614,9 +627,15 @@ def random_horizontal_flip(image,
               [num_instances, num_keypoints, 2]
    keypoint_visibilities: rank 2 bool tensor with shape
                           [num_instances, num_keypoints].
+    densepose_part_ids: rank 2 int32 tensor with shape
+                        [num_instances, num_points].
+    densepose_surface_coords: rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4].

  Raises:
    ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+    ValueError: if either densepose_part_ids or densepose_surface_coords is
+                not None, but both are not None.
  """

  def _flip_image(image):
@@ -628,6 +647,11 @@ def random_horizontal_flip(image,
    raise ValueError(
        'keypoints are provided but keypoints_flip_permutation is not provided')

+  if ((densepose_part_ids is not None and densepose_surface_coords is None) or
+      (densepose_part_ids is None and densepose_surface_coords is not None)):
+    raise ValueError(
+        'Must provide both `densepose_part_ids` and `densepose_surface_coords`')
+
  with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
    result = []
    # random variable defining whether to do flip or not
@@ -666,7 +690,6 @@ def random_horizontal_flip(image,
    # flip keypoint visibilities
    if (keypoint_visibilities is not None and
        keypoint_flip_permutation is not None):
-      permutation = keypoint_flip_permutation
      kpt_flip_perm = keypoint_flip_permutation
      keypoint_visibilities = tf.cond(
          do_a_flip_random,
@@ -674,6 +697,17 @@ def random_horizontal_flip(image,
          lambda: keypoint_visibilities)
      result.append(keypoint_visibilities)

+    # flip DensePose parts and coordinates
+    if densepose_part_ids is not None:
+      flip_densepose_fn = functools.partial(
+          densepose_ops.flip_horizontal, densepose_part_ids,
+          densepose_surface_coords)
+      densepose_tensors = tf.cond(
+          do_a_flip_random,
+          flip_densepose_fn,
+          lambda: (densepose_part_ids, densepose_surface_coords))
+      result.extend(densepose_tensors)
+
    return tuple(result)


@@ -1285,6 +1319,9 @@ def _strict_random_crop_image(image,
                              masks=None,
                              keypoints=None,
                              keypoint_visibilities=None,
+                              densepose_num_points=None,
+                              densepose_part_ids=None,
+                              densepose_surface_coords=None,
                              min_object_covered=1.0,
                              aspect_ratio_range=(0.75, 1.33),
                              area_range=(0.1, 1.0),
@@ -1322,6 +1359,19 @@ def _strict_random_crop_image(image,
               normalized coordinates.
    keypoint_visibilities: (optional) rank 2 bool tensor with shape
               [num_instances, num_keypoints].
+    densepose_num_points: (optional) rank 1 int32 tensor with shape
+                          [num_instances] with the number of sampled points per
+                          instance.
+    densepose_part_ids: (optional) rank 2 int32 tensor with shape
+                        [num_instances, num_points] holding the part id for each
+                        sampled point. These part_ids are 0-indexed, where the
+                        first non-background part has index 0.
+    densepose_surface_coords: (optional) rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4]. The DensePose
+                              coordinates are of the form (y, x, v, u) where
+                              (y, x) are the normalized image coordinates for a
+                              sampled point, and (v, u) is the surface
+                              coordinate for the part.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
@@ -1341,8 +1391,9 @@ def _strict_random_crop_image(image,
           Boxes are in normalized form.
    labels: new labels.

-    If label_weights, multiclass_scores, masks, keypoints, or
-    keypoint_visibilities is not None, the function also returns:
+    If label_weights, multiclass_scores, masks, keypoints,
+    keypoint_visibilities, densepose_num_points, densepose_part_ids, or
+    densepose_surface_coords is not None, the function also returns:
    label_weights: rank 1 float32 tensor with shape [num_instances].
    multiclass_scores: rank 2 float32 tensor with shape
                       [num_instances, num_classes]
@@ -1351,9 +1402,24 @@ def _strict_random_crop_image(image,
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
    keypoint_visibilities: rank 2 bool tensor with shape
-               [num_instances, num_keypoints]
+                           [num_instances, num_keypoints]
+    densepose_num_points: rank 1 int32 tensor with shape [num_instances].
+    densepose_part_ids: rank 2 int32 tensor with shape
+                        [num_instances, num_points].
+    densepose_surface_coords: rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4].
+
+  Raises:
+    ValueError: If some but not all of the DensePose tensors are provided.
  """
  with tf.name_scope('RandomCropImage', values=[image, boxes]):
+    densepose_tensors = [densepose_num_points, densepose_part_ids,
+                         densepose_surface_coords]
+    if (any(t is not None for t in densepose_tensors) and
+        not all(t is not None for t in densepose_tensors)):
+      raise ValueError('If cropping DensePose labels, must provide '
+                       '`densepose_num_points`, `densepose_part_ids`, and '
+                       '`densepose_surface_coords`')
    image_shape = tf.shape(image)

    # boxes are [N, 4]. Lets first make them [N, 1, 4].
@@ -1464,6 +1530,23 @@ def _strict_random_crop_image(image,
            new_keypoints, kpt_vis_of_boxes_completely_inside_window)
        result.append(new_kpt_visibilities)

+    if densepose_num_points is not None:
+      filtered_dp_tensors = []
+      for dp_tensor in densepose_tensors:
+        dp_tensor_inside_window = tf.gather(dp_tensor, inside_window_ids)
+        dp_tensor_completely_inside_window = tf.gather(dp_tensor_inside_window,
+                                                       keep_ids)
+        filtered_dp_tensors.append(dp_tensor_completely_inside_window)
+      new_dp_num_points = filtered_dp_tensors[0]
+      new_dp_point_ids = filtered_dp_tensors[1]
+      new_dp_surf_coords = densepose_ops.change_coordinate_frame(
+          filtered_dp_tensors[2], im_box_rank1)
+      if clip_boxes:
+        new_dp_num_points, new_dp_point_ids, new_dp_surf_coords = (
+            densepose_ops.prune_outside_window(
+                new_dp_num_points, new_dp_point_ids, new_dp_surf_coords,
+                window=[0.0, 0.0, 1.0, 1.0]))
+      result.extend([new_dp_num_points, new_dp_point_ids, new_dp_surf_coords])
    return tuple(result)


@@ -1476,6 +1559,9 @@ def random_crop_image(image,
                      masks=None,
                      keypoints=None,
                      keypoint_visibilities=None,
+                      densepose_num_points=None,
+                      densepose_part_ids=None,
+                      densepose_surface_coords=None,
                      min_object_covered=1.0,
                      aspect_ratio_range=(0.75, 1.33),
                      area_range=(0.1, 1.0),
@@ -1523,6 +1609,19 @@ def random_crop_image(image,
               normalized coordinates.
    keypoint_visibilities: (optional) rank 2 bool tensor with shape
                           [num_instances, num_keypoints].
+    densepose_num_points: (optional) rank 1 int32 tensor with shape
+                          [num_instances] with the number of sampled points per
+                          instance.
+    densepose_part_ids: (optional) rank 2 int32 tensor with shape
+                        [num_instances, num_points] holding the part id for each
+                        sampled point. These part_ids are 0-indexed, where the
+                        first non-background part has index 0.
+    densepose_surface_coords: (optional) rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4]. The DensePose
+                              coordinates are of the form (y, x, v, u) where
+                              (y, x) are the normalized image coordinates for a
+                              sampled point, and (v, u) is the surface
+                              coordinate for the part.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
@@ -1547,8 +1646,9 @@ def random_crop_image(image,
           form.
    labels: new labels.

-    If label_weights, multiclass_scores, masks, keypoints, keypoint_visibilities
-    is not None, the function also returns:
+    If label_weights, multiclass_scores, masks, keypoints,
+    keypoint_visibilities, densepose_num_points, densepose_part_ids,
+    densepose_surface_coords is not None, the function also returns:
    label_weights: rank 1 float32 tensor with shape [num_instances].
    multiclass_scores: rank 2 float32 tensor with shape
                       [num_instances, num_classes]
@@ -1557,7 +1657,12 @@ def random_crop_image(image,
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
    keypoint_visibilities: rank 2 bool tensor with shape
-               [num_instances, num_keypoints]
+                           [num_instances, num_keypoints]
+    densepose_num_points: rank 1 int32 tensor with shape [num_instances].
+    densepose_part_ids: rank 2 int32 tensor with shape
+                        [num_instances, num_points].
+    densepose_surface_coords: rank 3 float32 tensor with shape
+                              [num_instances, num_points, 4].
  """

  def strict_random_crop_image_fn():
@@ -1571,6 +1676,9 @@ def random_crop_image(image,
        masks=masks,
        keypoints=keypoints,
        keypoint_visibilities=keypoint_visibilities,
+        densepose_num_points=densepose_num_points,
+        densepose_part_ids=densepose_part_ids,
+        densepose_surface_coords=densepose_surface_coords,
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
@@ -1602,6 +1710,9 @@ def random_crop_image(image,
      outputs.append(keypoints)
    if keypoint_visibilities is not None:
      outputs.append(keypoint_visibilities)
+    if densepose_num_points is not None:
+      outputs.extend([densepose_num_points, densepose_part_ids,
+                      densepose_surface_coords])

    result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
                     lambda: tuple(outputs))
@@ -1612,6 +1723,7 @@ def random_pad_image(image,
                     boxes,
                     masks=None,
                     keypoints=None,
+                     densepose_surface_coords=None,
                     min_image_size=None,
                     max_image_size=None,
                     pad_color=None,
@@ -1639,6 +1751,11 @@ def random_pad_image(image,
    keypoints: (optional) rank 3 float32 tensor with shape
               [N, num_keypoints, 2]. The keypoints are in y-x normalized
               coordinates.
+    densepose_surface_coords: (optional) rank 3 float32 tensor with shape
+                              [N, num_points, 4]. The DensePose coordinates are
+                              of the form (y, x, v, u) where (y, x) are the
+                              normalized image coordinates for a sampled point,
+                              and (v, u) is the surface coordinate for the part.
    min_image_size: a tensor of size [min_height, min_width], type tf.int32.
                    If passed as None, will be set to image size
                    [height, width].
@@ -1663,6 +1780,9 @@ def random_pad_image(image,
    masks: rank 3 float32 tensor with shape [N, new_height, new_width]
    if keypoints is not None, the function also returns:
    keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
+    if densepose_surface_coords is not None, the function also returns:
+    densepose_surface_coords: rank 3 float32 tensor with shape
+      [num_instances, num_points, 4]
  """
  if pad_color is None:
    pad_color = tf.reduce_mean(image, axis=[0, 1])
@@ -1754,6 +1874,11 @@ def random_pad_image(image,
    new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
    result.append(new_keypoints)

+  if densepose_surface_coords is not None:
+    new_densepose_surface_coords = densepose_ops.change_coordinate_frame(
+        densepose_surface_coords, new_window)
+    result.append(new_densepose_surface_coords)
+
  return tuple(result)


@@ -1761,6 +1886,7 @@ def random_absolute_pad_image(image,
                              boxes,
                              masks=None,
                              keypoints=None,
+                              densepose_surface_coords=None,
                              max_height_padding=None,
                              max_width_padding=None,
                              pad_color=None,
@@ -1785,6 +1911,11 @@ def random_absolute_pad_image(image,
    keypoints: (optional) rank 3 float32 tensor with shape
               [N, num_keypoints, 2]. The keypoints are in y-x normalized
               coordinates.
+    densepose_surface_coords: (optional) rank 3 float32 tensor with shape
+                              [N, num_points, 4]. The DensePose coordinates are
+                              of the form (y, x, v, u) where (y, x) are the
+                              normalized image coordinates for a sampled point,
+                              and (v, u) is the surface coordinate for the part.
    max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of
                        height padding. The padding will be chosen uniformly at
                        random from [0, max_height_padding).
@@ -1817,6 +1948,7 @@ def random_absolute_pad_image(image,
      boxes,
      masks=masks,
      keypoints=keypoints,
+      densepose_surface_coords=densepose_surface_coords,
      min_image_size=min_image_size,
      max_image_size=max_image_size,
      pad_color=pad_color,
@@ -3852,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,

  Args:
    image: rank 3 float32 tensor containing 1 image ->
-           [height, width,channels].
+           [height, width, channels].
    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
@@ -3996,12 +4128,138 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  return return_values


+def random_scale_crop_and_pad_to_square(
+    image,
+    boxes,
+    labels,
+    label_weights,
+    masks=None,
+    keypoints=None,
+    scale_min=0.1,
+    scale_max=2.0,
+    output_size=512,
+    resize_method=tf.image.ResizeMethod.BILINEAR,
+    seed=None):
+  """Randomly scale, crop, and then pad an image to fixed square dimensions.
+
+   Randomly scale, crop, and then pad an image to the desired square output
+   dimensions. Specifically, this method first samples a random_scale factor
+   from a uniform distribution between scale_min and scale_max, and then resizes
+   the image such that it's maximum dimension is (output_size * random_scale).
+   Secondly, a square output_size crop is extracted from the resized image
+   (note, this will only occur when random_scale > 1.0). Lastly, the cropped
+   region is padded to the desired square output_size, by filling with zeros.
+   The augmentation is borrowed from [1]
+   [1]: https://arxiv.org/abs/1911.09070
+
+  Args:
+    image: rank 3 float32 tensor containing 1 image ->
+      [height, width, channels].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
+      are in normalized form meaning their coordinates vary between [0, 1]. Each
+      row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
+      are clipped to the boundary and boxes falling outside the crop are
+      ignored.
+    labels: rank 1 int32 tensor containing the object classes.
+    label_weights: float32 tensor of shape [num_instances] representing the
+      weight for each box.
+    masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
+      width] containing instance masks. The masks are of the same height, width
+      as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
+      num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
+    scale_min: float, the minimum value for the random scale factor.
+    scale_max: float, the maximum value for the random scale factor.
+    output_size: int, the desired (square) output image size.
+    resize_method: tf.image.ResizeMethod, resize method to use when scaling the
+      input images.
+    seed: random seed.
+
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    label_weights: rank 1 float32 tensor with shape [num_instances].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+
+  """
+
+  img_shape = tf.shape(image)
+  input_height, input_width = img_shape[0], img_shape[1]
+  random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
+
+  # Compute the scaled height and width from the random scale.
+  max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
+  input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
+  input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
+  scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
+  scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
+
+  # Compute the offsets:
+  offset_y = tf.cast(scaled_height - output_size, tf.float32)
+  offset_x = tf.cast(scaled_width - output_size, tf.float32)
+  offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_y = tf.cast(offset_y, tf.int32)
+  offset_x = tf.cast(offset_x, tf.int32)
+
+  # Scale, crop, and pad the input image.
+  scaled_image = tf.image.resize_images(
+      image, [scaled_height, scaled_width], method=resize_method)
+  scaled_image = scaled_image[offset_y:offset_y + output_size,
+                              offset_x:offset_x + output_size, :]
+  output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
+                                              output_size)
+
+  # Update the boxes.
+  new_window = tf.cast(
+      tf.stack([offset_y, offset_x,
+                offset_y + output_size, offset_x + output_size]),
+      dtype=tf.float32)
+  new_window /= tf.cast(
+      tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
+      dtype=tf.float32)
+  boxlist = box_list.BoxList(boxes)
+  boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  boxlist, indices = box_list_ops.prune_completely_outside_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0])
+  boxlist = box_list_ops.clip_to_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
+
+  return_values = [output_image, boxlist.get(),
+                   tf.gather(labels, indices),
+                   tf.gather(label_weights, indices)]
+
+  if masks is not None:
+    new_masks = tf.expand_dims(masks, -1)
+    new_masks = tf.image.resize_images(
+        new_masks, [scaled_height, scaled_width], method=resize_method)
+    new_masks = new_masks[:, offset_y:offset_y + output_size,
+                          offset_x:offset_x + output_size, :]
+    new_masks = tf.image.pad_to_bounding_box(
+        new_masks, 0, 0, output_size, output_size)
+    new_masks = tf.squeeze(new_masks, [-1])
+    return_values.append(tf.gather(new_masks, indices))
+
+  if keypoints is not None:
+    keypoints = tf.gather(keypoints, indices)
+    keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
+    keypoints = keypoint_ops.prune_outside_window(
+        keypoints, [0.0, 0.0, 1.0, 1.0])
+    return_values.append(keypoints)
+
+  return return_values
+
+
 def get_default_func_arg_map(include_label_weights=True,
                             include_label_confidences=False,
                             include_multiclass_scores=False,
                             include_instance_masks=False,
                             include_keypoints=False,
-                             include_keypoint_visibilities=False):
+                             include_keypoint_visibilities=False,
+                             include_dense_pose=False):
  """Returns the default mapping from a preprocessor function to its args.

  Args:
@@ -4017,6 +4275,8 @@ def get_default_func_arg_map(include_label_weights=True,
      keypoints, too.
    include_keypoint_visibilities: If True, preprocessing functions will modify
      the keypoint visibilities, too.
+    include_dense_pose: If True, preprocessing functions will modify the
+      DensePose labels, too.

  Returns:
    A map from preprocessing functions to the arguments they receive.
@@ -4049,6 +4309,17 @@ def get_default_func_arg_map(include_label_weights=True,
    groundtruth_keypoint_visibilities = (
        fields.InputDataFields.groundtruth_keypoint_visibilities)

+  groundtruth_dp_num_points = None
+  groundtruth_dp_part_ids = None
+  groundtruth_dp_surface_coords = None
+  if include_dense_pose:
+    groundtruth_dp_num_points = (
+        fields.InputDataFields.groundtruth_dp_num_points)
+    groundtruth_dp_part_ids = (
+        fields.InputDataFields.groundtruth_dp_part_ids)
+    groundtruth_dp_surface_coords = (
+        fields.InputDataFields.groundtruth_dp_surface_coords)
+
  prep_func_arg_map = {
      normalize_image: (fields.InputDataFields.image,),
      random_horizontal_flip: (
@@ -4057,6 +4328,8 @@ def get_default_func_arg_map(include_label_weights=True,
          groundtruth_instance_masks,
          groundtruth_keypoints,
          groundtruth_keypoint_visibilities,
+          groundtruth_dp_part_ids,
+          groundtruth_dp_surface_coords,
      ),
      random_vertical_flip: (
          fields.InputDataFields.image,
@@ -4082,21 +4355,22 @@ def get_default_func_arg_map(include_label_weights=True,
      random_adjust_saturation: (fields.InputDataFields.image,),
      random_distort_color: (fields.InputDataFields.image,),
      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
-      random_crop_image: (fields.InputDataFields.image,
-                          fields.InputDataFields.groundtruth_boxes,
-                          fields.InputDataFields.groundtruth_classes,
-                          groundtruth_label_weights,
-                          groundtruth_label_confidences, multiclass_scores,
-                          groundtruth_instance_masks, groundtruth_keypoints,
-                          groundtruth_keypoint_visibilities),
+      random_crop_image:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_label_confidences,
+           multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
+           groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
+           groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
      random_pad_image:
          (fields.InputDataFields.image,
           fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
-           groundtruth_keypoints),
+           groundtruth_keypoints, groundtruth_dp_surface_coords),
      random_absolute_pad_image:
          (fields.InputDataFields.image,
           fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
-           groundtruth_keypoints),
+           groundtruth_keypoints, groundtruth_dp_surface_coords),
      random_crop_pad_image: (fields.InputDataFields.image,
                              fields.InputDataFields.groundtruth_boxes,
                              fields.InputDataFields.groundtruth_classes,
@@ -4211,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
           fields.InputDataFields.groundtruth_classes,
           groundtruth_label_weights, groundtruth_instance_masks,
           groundtruth_keypoints),
+      random_scale_crop_and_pad_to_square:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_instance_masks,
+           groundtruth_keypoints),
  }

  return prep_func_arg_map

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -119,6 +119,24 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    ])
    return tf.constant(keypoints, dtype=tf.float32)

+  def createTestDensePose(self):
+    dp_num_points = tf.constant([1, 3], dtype=tf.int32)
+    dp_part_ids = tf.constant(
+        [[4, 0, 0],
+         [1, 0, 5]], dtype=tf.int32)
+    dp_surface_coords = tf.constant(
+        [
+            # Instance 0.
+            [[0.1, 0.2, 0.6, 0.7],
+             [0.0, 0.0, 0.0, 0.0],
+             [0.0, 0.0, 0.0, 0.0]],
+            # Instance 1.
+            [[0.8, 0.9, 0.2, 0.4],
+             [0.1, 0.3, 0.2, 0.8],
+             [0.6, 1.0, 0.3, 0.4]],
+        ], dtype=tf.float32)
+    return dp_num_points, dp_part_ids, dp_surface_coords
+
  def createKeypointFlipPermutation(self):
    return [0, 2, 1]

@@ -694,51 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
                                test_masks=True,
                                test_keypoints=True)

-  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
-
-    def graph_fn():
-      preprocess_options = [(preprocessor.random_horizontal_flip, {})]
-      image_height = 3
-      image_width = 3
-      images = tf.random_uniform([1, image_height, image_width, 3])
-      boxes = self.createTestBoxes()
-      masks = self.createTestMasks()
-      keypoints, keypoint_visibilities = self.createTestKeypoints()
-      keypoint_flip_permutation = self.createKeypointFlipPermutation()
-      tensor_dict = {
-          fields.InputDataFields.image:
-              images,
-          fields.InputDataFields.groundtruth_boxes:
-              boxes,
-          fields.InputDataFields.groundtruth_instance_masks:
-              masks,
-          fields.InputDataFields.groundtruth_keypoints:
-              keypoints,
-          fields.InputDataFields.groundtruth_keypoint_visibilities:
-              keypoint_visibilities
-      }
-      preprocess_options = [(preprocessor.random_horizontal_flip, {
-          'keypoint_flip_permutation': keypoint_flip_permutation
-      })]
-      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
-          include_instance_masks=True,
-          include_keypoints=True,
-          include_keypoint_visibilities=True)
-      tensor_dict = preprocessor.preprocess(
-          tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
-      boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-      masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
-      keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
-      keypoint_visibilities = tensor_dict[
-          fields.InputDataFields.groundtruth_keypoint_visibilities]
-      return [boxes, masks, keypoints, keypoint_visibilities]
-
-    boxes, masks, keypoints, keypoint_visibilities = self.execute_cpu(
-        graph_fn, [])
-    self.assertIsNotNone(boxes)
-    self.assertIsNotNone(masks)
-    self.assertIsNotNone(keypoints)
-    self.assertIsNotNone(keypoint_visibilities)

  def testRandomVerticalFlip(self):

@@ -1886,6 +1859,65 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
      self.assertAllClose(
          distorted_keypoints_.flatten(), expected_keypoints.flatten())

+  def testRunRandomCropImageWithDensePose(self):
+    def graph_fn():
+      image = self.createColorfulTestImage()
+      boxes = self.createTestBoxes()
+      labels = self.createTestLabels()
+      weights = self.createTestGroundtruthWeights()
+      dp_num_points, dp_part_ids, dp_surface_coords = self.createTestDensePose()
+
+      tensor_dict = {
+          fields.InputDataFields.image: image,
+          fields.InputDataFields.groundtruth_boxes: boxes,
+          fields.InputDataFields.groundtruth_classes: labels,
+          fields.InputDataFields.groundtruth_weights: weights,
+          fields.InputDataFields.groundtruth_dp_num_points: dp_num_points,
+          fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
+          fields.InputDataFields.groundtruth_dp_surface_coords:
+              dp_surface_coords
+      }
+
+      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+          include_dense_pose=True)
+
+      preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+      with mock.patch.object(
+          tf.image,
+          'sample_distorted_bounding_box'
+      ) as mock_sample_distorted_bounding_box:
+        mock_sample_distorted_bounding_box.return_value = (
+            tf.constant([6, 40, 0], dtype=tf.int32),
+            tf.constant([134, 340, -1], dtype=tf.int32),
+            tf.constant([[[0.03, 0.1, 0.7, 0.95]]], dtype=tf.float32))
+        distorted_tensor_dict = preprocessor.preprocess(
+            tensor_dict,
+            preprocessing_options,
+            func_arg_map=preprocessor_arg_map)
+        distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+        distorted_dp_num_points = distorted_tensor_dict[
+            fields.InputDataFields.groundtruth_dp_num_points]
+        distorted_dp_part_ids = distorted_tensor_dict[
+            fields.InputDataFields.groundtruth_dp_part_ids]
+        distorted_dp_surface_coords = distorted_tensor_dict[
+            fields.InputDataFields.groundtruth_dp_surface_coords]
+        return [distorted_image, distorted_dp_num_points, distorted_dp_part_ids,
+                distorted_dp_surface_coords]
+    (distorted_image_, distorted_dp_num_points_, distorted_dp_part_ids_,
+     distorted_dp_surface_coords_) = self.execute_cpu(graph_fn, [])
+    expected_dp_num_points = np.array([1, 1])
+    expected_dp_part_ids = np.array([[4], [0]])
+    expected_dp_surface_coords = np.array([
+        [[0.10447761, 0.1176470, 0.6, 0.7]],
+        [[0.10447761, 0.2352941, 0.2, 0.8]],
+    ])
+    self.assertAllEqual(distorted_image_.shape, [1, 134, 340, 3])
+    self.assertAllEqual(distorted_dp_num_points_, expected_dp_num_points)
+    self.assertAllEqual(distorted_dp_part_ids_, expected_dp_part_ids)
+    self.assertAllClose(distorted_dp_surface_coords_,
+                        expected_dp_surface_coords)
+
  def testRunRetainBoxesAboveThreshold(self):
    def graph_fn():
      boxes = self.createTestBoxes()
@@ -2276,7 +2308,10 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
        padded_boxes_[:, 3] - padded_boxes_[:, 1])))

-  def testRandomPadImageWithKeypointsAndMasks(self):
+  @parameterized.parameters(
+      {'include_dense_pose': False},
+  )
+  def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
    def graph_fn():
      preprocessing_options = [(preprocessor.normalize_image, {
          'original_minval': 0,
@@ -2290,12 +2325,15 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
      labels = self.createTestLabels()
      masks = self.createTestMasks()
      keypoints, _ = self.createTestKeypoints()
+      _, _, dp_surface_coords = self.createTestDensePose()
      tensor_dict = {
          fields.InputDataFields.image: images,
          fields.InputDataFields.groundtruth_boxes: boxes,
          fields.InputDataFields.groundtruth_classes: labels,
          fields.InputDataFields.groundtruth_instance_masks: masks,
          fields.InputDataFields.groundtruth_keypoints: keypoints,
+          fields.InputDataFields.groundtruth_dp_surface_coords:
+              dp_surface_coords
      }
      tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
      images = tensor_dict[fields.InputDataFields.image]
@@ -2304,7 +2342,8 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
      func_arg_map = preprocessor.get_default_func_arg_map(
          include_instance_masks=True,
          include_keypoints=True,
-          include_keypoint_visibilities=True)
+          include_keypoint_visibilities=True,
+          include_dense_pose=include_dense_pose)
      padded_tensor_dict = preprocessor.preprocess(tensor_dict,
                                                   preprocessing_options,
                                                   func_arg_map=func_arg_map)
@@ -2323,15 +2362,29 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
      padded_keypoints_shape = tf.shape(padded_keypoints)
      images_shape = tf.shape(images)
      padded_images_shape = tf.shape(padded_images)
-      return [boxes_shape, padded_boxes_shape, padded_masks_shape,
-              keypoints_shape, padded_keypoints_shape, images_shape,
-              padded_images_shape, boxes, padded_boxes, keypoints,
-              padded_keypoints]
-
-    (boxes_shape_, padded_boxes_shape_, padded_masks_shape_,
-     keypoints_shape_, padded_keypoints_shape_, images_shape_,
-     padded_images_shape_, boxes_, padded_boxes_,
-     keypoints_, padded_keypoints_) = self.execute_cpu(graph_fn, [])
+      outputs = [boxes_shape, padded_boxes_shape, padded_masks_shape,
+                 keypoints_shape, padded_keypoints_shape, images_shape,
+                 padded_images_shape, boxes, padded_boxes, keypoints,
+                 padded_keypoints]
+      if include_dense_pose:
+        padded_dp_surface_coords = padded_tensor_dict[
+            fields.InputDataFields.groundtruth_dp_surface_coords]
+        outputs.extend([dp_surface_coords, padded_dp_surface_coords])
+      return outputs
+
+    outputs = self.execute_cpu(graph_fn, [])
+    boxes_shape_ = outputs[0]
+    padded_boxes_shape_ = outputs[1]
+    padded_masks_shape_ = outputs[2]
+    keypoints_shape_ = outputs[3]
+    padded_keypoints_shape_ = outputs[4]
+    images_shape_ = outputs[5]
+    padded_images_shape_ = outputs[6]
+    boxes_ = outputs[7]
+    padded_boxes_ = outputs[8]
+    keypoints_ = outputs[9]
+    padded_keypoints_ = outputs[10]
+
    self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
    self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_)
    self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
@@ -2347,6 +2400,11 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
        padded_keypoints_[1, :, 0] - padded_keypoints_[0, :, 0])))
    self.assertTrue(np.all((keypoints_[1, :, 1] - keypoints_[0, :, 1]) >= (
        padded_keypoints_[1, :, 1] - padded_keypoints_[0, :, 1])))
+    if include_dense_pose:
+      dp_surface_coords = outputs[11]
+      padded_dp_surface_coords = outputs[12]
+      self.assertAllClose(padded_dp_surface_coords[:, :, 2:],
+                          dp_surface_coords[:, :, 2:])

  def testRandomAbsolutePadImage(self):
    height_padding = 10
@@ -3783,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    size = max(image.shape)
    self.assertAlmostEqual(scale * 256.0, size)

+    self.assertAllClose(image[:, :, 0], masks[0, :, :])
+
+  @parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
+                                  ('scale_2_0', 2.0))
+  def test_random_scale_crop_and_pad_to_square(self, scale):
+
+    def graph_fn():
+      image = np.random.randn(512, 256, 1)
+      box_centers = [0.25, 0.5, 0.75]
+      box_size = 0.1
+      box_corners = []
+      box_labels = []
+      box_label_weights = []
+      keypoints = []
+      masks = []
+      for center_y in box_centers:
+        for center_x in box_centers:
+          box_corners.append(
+              [center_y - box_size / 2.0, center_x - box_size / 2.0,
+               center_y + box_size / 2.0, center_x + box_size / 2.0])
+          box_labels.append([1])
+          box_label_weights.append([1.])
+          keypoints.append(
+              [[center_y - box_size / 2.0, center_x - box_size / 2.0],
+               [center_y + box_size / 2.0, center_x + box_size / 2.0]])
+          masks.append(image[:, :, 0].reshape(512, 256))
+
+      image = tf.constant(image)
+      boxes = tf.constant(box_corners)
+      labels = tf.constant(box_labels)
+      label_weights = tf.constant(box_label_weights)
+      keypoints = tf.constant(keypoints)
+      masks = tf.constant(np.stack(masks))
+
+      (new_image, new_boxes, _, _, new_masks,
+       new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
+           image,
+           boxes,
+           labels,
+           label_weights,
+           masks=masks,
+           keypoints=keypoints,
+           scale_min=scale,
+           scale_max=scale,
+           output_size=512)
+      return new_image, new_boxes, new_masks, new_keypoints
+
+    image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
+
+    # Since random_scale_crop_and_pad_to_square may prune and clip boxes,
+    # we only need to find one of the boxes that was not clipped and check
+    # that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
+    # is equivalent to round(a-b, 7) == 0.
+    any_box_has_correct_size = False
+    effective_scale_y = int(scale * 512) / 512.0
+    effective_scale_x = int(scale * 256) / 512.0
+    expected_size_y = 0.1 * effective_scale_y
+    expected_size_x = 0.1 * effective_scale_x
+    for box in boxes:
+      ymin, xmin, ymax, xmax = box
+      any_box_has_correct_size |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_box_has_correct_size)
+
+    # Similar to the approach above where we check for at least one box with the
+    # expected dimensions, we check for at least one pair of keypoints whose
+    # distance matches the expected dimensions.
+    any_keypoint_pair_has_correct_dist = False
+    for keypoint_pair in keypoints:
+      ymin, xmin = keypoint_pair[0]
+      ymax, xmax = keypoint_pair[1]
+      any_keypoint_pair_has_correct_dist |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_keypoint_pair_has_correct_dist)
+
+    self.assertAlmostEqual(512.0, image.shape[0])
+    self.assertAlmostEqual(512.0, image.shape[1])
+
    self.assertAllClose(image[:, :, 0],
                        masks[0, :, :])


--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -66,6 +66,11 @@ class InputDataFields(object):
    groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
    groundtruth_label_weights: groundtruth label weights.
    groundtruth_weights: groundtruth weight factor for bounding boxes.
+    groundtruth_dp_num_points: The number of DensePose sampled points for each
+      instance.
+    groundtruth_dp_part_ids: Part indices for DensePose points.
+    groundtruth_dp_surface_coords: Image locations and UV coordinates for
+      DensePose points.
    num_groundtruth_boxes: number of groundtruth boxes.
    is_annotated: whether an image has been labeled or not.
    true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
  groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
  groundtruth_label_weights = 'groundtruth_label_weights'
  groundtruth_weights = 'groundtruth_weights'
+  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+  groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
  num_groundtruth_boxes = 'num_groundtruth_boxes'
  is_annotated = 'is_annotated'
  true_image_shape = 'true_image_shape'
@@ -133,6 +141,8 @@ class DetectionResultFields(object):
      for detection boxes in the image including background class.
    detection_classes: detection-level class labels.
    detection_masks: contains a segmentation mask for each detection box.
+    detection_surface_coords: contains DensePose surface coordinates for each
+      box.
    detection_boundaries: contains an object boundary for each detection box.
    detection_keypoints: contains detection keypoints for each detection box.
    detection_keypoint_scores: contains detection keypoint scores.
@@ -153,6 +163,7 @@ class DetectionResultFields(object):
  detection_features = 'detection_features'
  detection_classes = 'detection_classes'
  detection_masks = 'detection_masks'
+  detection_surface_coords = 'detection_surface_coords'
  detection_boundaries = 'detection_boundaries'
  detection_keypoints = 'detection_keypoints'
  detection_keypoint_scores = 'detection_keypoint_scores'
@@ -174,7 +185,11 @@ class BoxListFields(object):
    masks: masks per bounding box.
    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
+    keypoint_visibilities: keypoint visibilities per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
+    densepose_num_points: number of DensePose points per bounding box.
+    densepose_part_ids: DensePose part ids per bounding box.
+    densepose_surface_coords: DensePose surface coordinates per bounding box.
    is_crowd: is_crowd annotation per bounding box.
  """
  boxes = 'boxes'
@@ -188,6 +203,9 @@ class BoxListFields(object):
  keypoints = 'keypoints'
  keypoint_visibilities = 'keypoint_visibilities'
  keypoint_heatmaps = 'keypoint_heatmaps'
+  densepose_num_points = 'densepose_num_points'
+  densepose_part_ids = 'densepose_part_ids'
+  densepose_surface_coords = 'densepose_surface_coords'
  is_crowd = 'is_crowd'
  group_of = 'group_of'


--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
 from object_detection.core import box_coder
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
+from object_detection.core import densepose_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import matcher as mat
 from object_detection.core import region_similarity_calculator as sim_calc
@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
  function.

  Args:
-    batch_predictions: A tensor of shape [batch_size, height, width, 2] for
-      single class offsets and [batch_size, height, width, class, 2] for
-      multiple classes offsets (e.g. keypoint joint offsets) representing the
-      (height, width) or (y_offset, x_offset) predictions over a batch.
-    indices: A tensor of shape [num_instances, 3] for single class offset and
-      [num_instances, 4] for multiple classes offsets representing the indices
-      in the batch to be penalized in a loss function
+    batch_predictions: A tensor of shape [batch_size, height, width, channels]
+      or [batch_size, height, width, class, channels] for class-specific
+      features (e.g. keypoint joint offsets).
+    indices: A tensor of shape [num_instances, 3] for single class features or
+      [num_instances, 4] for multiple classes features.

  Returns:
-    values: A tensor of shape [num_instances, 2] holding the predicted values
-      at the given indices.
+    values: A tensor of shape [num_instances, channels] holding the predicted
+      values at the given indices.
  """
  return tf.gather_nd(batch_predictions, indices)

@@ -1601,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
    return (batch_indices, batch_offsets, batch_weights)


+def _resize_masks(masks, height, width, method):
+  # Resize segmentation masks to conform to output dimensions. Use TF2
+  # image resize because TF1's version is buggy:
+  # https://yaqs.corp.google.com/eng/q/4970450458378240
+  masks = tf2.image.resize(
+      masks[:, :, :, tf.newaxis],
+      size=(height, width),
+      method=method)
+  return masks[:, :, :, 0]
+
+
 class CenterNetMaskTargetAssigner(object):
  """Wrapper to compute targets for segmentation masks."""

@@ -1642,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):

    segmentation_targets_list = []
    for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
-      # Resize segmentation masks to conform to output dimensions. Use TF2
-      # image resize because TF1's version is buggy:
-      # https://yaqs.corp.google.com/eng/q/4970450458378240
-      gt_masks = tf2.image.resize(
-          gt_masks[:, :, :, tf.newaxis],
-          size=(output_height, output_width),
-          method=mask_resize_method)
+      gt_masks = _resize_masks(gt_masks, output_height, output_width,
+                               mask_resize_method)
+      gt_masks = gt_masks[:, :, :, tf.newaxis]
      gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
      # Shape: [h, w, num_classes].
      segmentations_for_image = tf.reduce_max(
@@ -1657,3 +1663,235 @@ class CenterNetMaskTargetAssigner(object):

    segmentation_target = tf.stack(segmentation_targets_list, axis=0)
    return segmentation_target
+
+
+class CenterNetDensePoseTargetAssigner(object):
+  """Wrapper to compute targets for DensePose task."""
+
+  def __init__(self, stride, num_parts=24):
+    self._stride = stride
+    self._num_parts = num_parts
+
+  def assign_part_and_coordinate_targets(self,
+                                         height,
+                                         width,
+                                         gt_dp_num_points_list,
+                                         gt_dp_part_ids_list,
+                                         gt_dp_surface_coords_list,
+                                         gt_weights_list=None):
+    """Returns the DensePose part_id and coordinate targets and their indices.
+
+    The returned values are expected to be used with predicted tensors
+    of size (batch_size, height//self._stride, width//self._stride, 2). The
+    predicted values at the relevant indices can be retrieved with the
+    get_batch_predictions_from_indices function.
+
+    Args:
+      height: int, height of input to the model. This is used to determine the
+        height of the output.
+      width: int, width of the input to the model. This is used to determine the
+        width of the output.
+      gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
+        containing the number of DensePose sampled points per box.
+      gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding, as
+        boxes may contain a different number of sampled points.
+      gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
+        [num_boxes, max_sampled_points, 4] containing the DensePose surface
+        coordinates (normalized) for each sampled point. Note that there may be
+        padding.
+      gt_weights_list: A list of 1-D tensors with shape [num_boxes]
+        corresponding to the weight of each groundtruth detection box.
+
+    Returns:
+      batch_indices: an integer tensor of shape [num_total_points, 4] holding
+        the indices inside the predicted tensor which should be penalized. The
+        first column indicates the index along the batch dimension and the
+        second and third columns indicate the index along the y and x
+        dimensions respectively. The fourth column is the part index.
+      batch_part_ids: an int tensor of shape [num_total_points, num_parts]
+        holding 1-hot encodings of parts for each sampled point.
+      batch_surface_coords: a float tensor of shape [num_total_points, 2]
+        holding the expected (v, u) coordinates for each sampled point.
+      batch_weights: a float tensor of shape [num_total_points] indicating the
+        weight of each prediction.
+      Note that num_total_points = batch_size * num_boxes * max_sampled_points.
+    """
+
+    if gt_weights_list is None:
+      gt_weights_list = [None] * len(gt_dp_num_points_list)
+
+    batch_indices = []
+    batch_part_ids = []
+    batch_surface_coords = []
+    batch_weights = []
+
+    for i, (num_points, part_ids, surface_coords, weights) in enumerate(
+        zip(gt_dp_num_points_list, gt_dp_part_ids_list,
+            gt_dp_surface_coords_list, gt_weights_list)):
+      num_boxes, max_sampled_points = (
+          shape_utils.combined_static_and_dynamic_shape(part_ids))
+      part_ids_flattened = tf.reshape(part_ids, [-1])
+      part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
+      # Get DensePose coordinates in the output space.
+      surface_coords_abs = densepose_ops.to_absolute_coordinates(
+          surface_coords, height // self._stride, width // self._stride)
+      surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
+      # Each tensor has shape [num_boxes * max_sampled_points].
+      yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
+
+      # Get the indices (in output space) for the DensePose coordinates. Note
+      # that if self._stride is larger than 1, this will have the effect of
+      # reducing spatial resolution of the groundtruth points.
+      indices_y = tf.cast(yabs, tf.int32)
+      indices_x = tf.cast(xabs, tf.int32)
+
+      # Assign ones if weights are not provided.
+      if weights is None:
+        weights = tf.ones(num_boxes, dtype=tf.float32)
+      # Create per-point weights.
+      weights_per_point = tf.reshape(
+          tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
+          shape=[-1])
+      # Mask out invalid (i.e. padded) DensePose points.
+      num_points_tiled = tf.tile(num_points[:, tf.newaxis],
+                                 multiples=[1, max_sampled_points])
+      range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
+                            multiples=[num_boxes, 1])
+      valid_points = tf.math.less(range_tiled, num_points_tiled)
+      valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
+      weights_per_point = weights_per_point * valid_points
+
+      # Shape of [num_boxes * max_sampled_points] integer tensor filled with
+      # current batch index.
+      batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
+      batch_indices.append(
+          tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
+                   axis=1))
+      batch_part_ids.append(part_ids_one_hot)
+      batch_surface_coords.append(tf.stack([v, u], axis=1))
+      batch_weights.append(weights_per_point)
+
+    batch_indices = tf.concat(batch_indices, axis=0)
+    batch_part_ids = tf.concat(batch_part_ids, axis=0)
+    batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
+    batch_weights = tf.concat(batch_weights, axis=0)
+    return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
+
+
+def filter_mask_overlap_min_area(masks):
+  """If a pixel belongs to 2 instances, remove it from the larger instance."""
+
+  num_instances = tf.shape(masks)[0]
+  def _filter_min_area():
+    """Helper function to filter non empty masks."""
+    areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
+    per_pixel_area = masks * areas
+    # Make sure background is ignored in argmin.
+    per_pixel_area = (masks * per_pixel_area +
+                      (1 - masks) * per_pixel_area.dtype.max)
+    min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
+
+    filtered_masks = (
+        tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
+        ==
+        min_index[tf.newaxis, :, :]
+    )
+
+    return tf.cast(filtered_masks, tf.float32) * masks
+
+  return tf.cond(num_instances > 0, _filter_min_area,
+                 lambda: masks)
+
+
+def filter_mask_overlap(masks, method='min_area'):
+
+  if method == 'min_area':
+    return filter_mask_overlap_min_area(masks)
+  else:
+    raise ValueError('Unknown mask overlap filter type - {}'.format(method))
+
+
+class CenterNetCornerOffsetTargetAssigner(object):
+  """Wrapper to compute corner offsets for boxes using masks."""
+
+  def __init__(self, stride, overlap_resolution='min_area'):
+    """Initializes the corner offset target assigner.
+
+    Args:
+      stride: int, the stride of the network in output pixels.
+      overlap_resolution: string, specifies how we handle overlapping
+        instance masks. Currently only 'min_area' is supported which assigns
+        overlapping pixels to the instance with the minimum area.
+    """
+
+    self._stride = stride
+    self._overlap_resolution = overlap_resolution
+
+  def assign_corner_offset_targets(
+      self, gt_boxes_list, gt_masks_list):
+    """Computes the corner offset targets and foreground map.
+
+    For each pixel that is part of any object's foreground, this function
+    computes the relative offsets to the top-left and bottom-right corners of
+    that instance's bounding box. It also returns a foreground map to indicate
+    which pixels contain valid corner offsets.
+
+    Args:
+      gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
+        representing the groundtruth detection bounding boxes for each sample in
+        the batch. The coordinates are expected in normalized coordinates.
+      gt_masks_list: A list of float tensors with shape [num_boxes,
+        input_height, input_width] with values in {0, 1} representing instance
+        masks for each object.
+
+    Returns:
+      corner_offsets: A float tensor of shape [batch_size, height, width, 4]
+        containing, in order, the (y, x) offsets to the top left corner and
+        the (y, x) offsets to the bottom right corner for each foregroung pixel
+      foreground: A float tensor of shape [batch_size, height, width] in which
+        each pixel is set to 1 if it is a part of any instance's foreground
+        (and thus contains valid corner offsets) and 0 otherwise.
+
+    """
+    _, input_height, input_width = (
+        shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
+    output_height = input_height // self._stride
+    output_width = input_width // self._stride
+    y_grid, x_grid = tf.meshgrid(
+        tf.range(output_height), tf.range(output_width),
+        indexing='ij')
+    y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
+
+    corner_targets = []
+    foreground_targets = []
+    for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
+      gt_masks = _resize_masks(gt_masks, output_height, output_width,
+                               method=ResizeMethod.NEAREST_NEIGHBOR)
+      gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
+
+      ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
+      ymin, ymax = ymin * output_height, ymax * output_height
+      xmin, xmax = xmin * output_width, xmax * output_width
+
+      top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
+      left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
+      bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
+      right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
+
+      foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
+                                  tf.float32)
+      foreground_targets.append(foreground_target)
+
+      corner_target = tf.stack([
+          tf.reduce_sum(top_y * gt_masks, axis=0),
+          tf.reduce_sum(left_x * gt_masks, axis=0),
+          tf.reduce_sum(bottom_y * gt_masks, axis=0),
+          tf.reduce_sum(right_x * gt_masks, axis=0),
+      ], axis=2)
+
+      corner_targets.append(corner_target)
+
+    return (tf.stack(corner_targets, axis=0),
+            tf.stack(foreground_targets, axis=0))
--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -1906,6 +1906,274 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
        expected_seg_target, segmentation_target)


+class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
+
+  def test_assign_part_and_coordinate_targets(self):
+    def graph_fn():
+      gt_dp_num_points_list = [
+          # Example 0.
+          tf.constant([2, 0, 3], dtype=tf.int32),
+          # Example 1.
+          tf.constant([1, 1], dtype=tf.int32),
+      ]
+      gt_dp_part_ids_list = [
+          # Example 0.
+          tf.constant([[1, 6, 0],
+                       [0, 0, 0],
+                       [0, 2, 3]], dtype=tf.int32),
+          # Example 1.
+          tf.constant([[7, 0, 0],
+                       [0, 0, 0]], dtype=tf.int32),
+      ]
+      gt_dp_surface_coords_list = [
+          # Example 0.
+          tf.constant(
+              [[[0.11, 0.2, 0.3, 0.4],  # Box 0.
+                [0.6, 0.4, 0.1, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.0, 0.0, 0.0, 0.0],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.22, 0.1, 0.6, 0.8],  # Box 2.
+                [0.0, 0.4, 0.5, 1.0],
+                [0.3, 0.2, 0.4, 0.1]]],
+              dtype=tf.float32),
+          # Example 1.
+          tf.constant(
+              [[[0.5, 0.5, 0.3, 1.0],  # Box 0.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.2, 0.2, 0.5, 0.8],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]]],
+              dtype=tf.float32),
+      ]
+      gt_weights_list = [
+          # Example 0.
+          tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
+          # Example 1.
+          tf.constant([0.0, 1.0], dtype=tf.float32),
+      ]
+      cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
+      batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+          cn_assigner.assign_part_and_coordinate_targets(
+              height=120,
+              width=80,
+              gt_dp_num_points_list=gt_dp_num_points_list,
+              gt_dp_part_ids_list=gt_dp_part_ids_list,
+              gt_dp_surface_coords_list=gt_dp_surface_coords_list,
+              gt_weights_list=gt_weights_list))
+
+      return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
+    batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+        self.execute(graph_fn, []))
+
+    expected_batch_indices = np.array([
+        # Example 0. e.g.
+        # The first set of indices is calculated as follows:
+        # floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
+        [0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
+        [0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
+        # Example 1.
+        [1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
+        [1, 0, 0, 0]
+    ], dtype=np.int32)
+    expected_batch_part_ids = tf.one_hot(
+        [1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
+    expected_batch_surface_coords = np.array([
+        # Box 0.
+        [0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
+        [0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
+        # Box 1.
+        [0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
+    ], np.float32)
+    expected_batch_weights = np.array([
+        # Box 0.
+        1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
+        # Box 1.
+        0.0, 0.0, 0.0, 1.0, 0.0, 0.0
+    ], dtype=np.float32)
+    self.assertAllEqual(expected_batch_indices, batch_indices)
+    self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
+    self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
+    self.assertAllClose(expected_batch_weights, batch_weights)
+
+
+class CornerOffsetTargetAssignerTest(test_case.TestCase):
+
+  def test_filter_overlap_min_area_empty(self):
+    """Test that empty masks work on CPU."""
+    def graph_fn(masks):
+      return targetassigner.filter_mask_overlap_min_area(masks)
+
+    masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)])
+    self.assertEqual(masks.shape, (0, 5, 5))
+
+  def test_filter_overlap_min_area(self):
+    """Test the object with min. area is selected instead of overlap."""
+    def graph_fn(masks):
+      return targetassigner.filter_mask_overlap_min_area(masks)
+
+    masks = np.zeros((3, 4, 4), dtype=np.float32)
+    masks[0, :2, :2] = 1.0
+    masks[1, :3, :3] = 1.0
+    masks[2, 3, 3] = 1.0
+
+    masks = self.execute(graph_fn, [masks])
+
+    self.assertAllClose(masks[0],
+                        [[1, 1, 0, 0],
+                         [1, 1, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(masks[1],
+                        [[0, 0, 1, 0],
+                         [0, 0, 1, 0],
+                         [1, 1, 1, 0],
+                         [0, 0, 0, 0]])
+
+    self.assertAllClose(masks[2],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+
+  def test_assign_corner_offset_single_object(self):
+    """Test that corner offsets are correct with a single object."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+
+    def graph_fn():
+      boxes = [
+          tf.constant([[0., 0., 1., 1.]])
+      ]
+      mask = np.zeros((1, 4, 4), dtype=np.float32)
+      mask[0, 1:3, 1:3] = 1.0
+
+      masks = [tf.constant(mask)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+
+    corner_offsets, foreground = self.execute(graph_fn, [])
+    self.assertAllClose(foreground[0],
+                        [[0, 0, 0, 0],
+                         [0, 1, 1, 0],
+                         [0, 1, 1, 0],
+                         [0, 0, 0, 0]])
+
+    self.assertAllClose(corner_offsets[0, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [0, -1, -1, 0],
+                         [0, -2, -2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 1],
+                        [[0, 0, 0, 0],
+                         [0, -1, -2, 0],
+                         [0, -1, -2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 2],
+                        [[0, 0, 0, 0],
+                         [0, 3, 3, 0],
+                         [0, 2, 2, 0],
+                         [0, 0, 0, 0]])
+    self.assertAllClose(corner_offsets[0, :, :, 3],
+                        [[0, 0, 0, 0],
+                         [0, 3, 2, 0],
+                         [0, 3, 2, 0],
+                         [0, 0, 0, 0]])
+
+  def test_assign_corner_offset_multiple_objects(self):
+    """Test corner offsets are correct with multiple objects."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+
+    def graph_fn():
+      boxes = [
+          tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]),
+          tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]])
+      ]
+      mask1 = np.zeros((2, 4, 4), dtype=np.float32)
+      mask1[0, 0, 0] = 1.0
+      mask1[0, 3, 3] = 1.0
+
+      mask2 = np.zeros((2, 4, 4), dtype=np.float32)
+      mask2[0, :2, :2] = 1.0
+      mask2[1, 1:, 1:] = 1.0
+
+      masks = [tf.constant(mask1), tf.constant(mask2)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+
+    corner_offsets, foreground = self.execute(graph_fn, [])
+    self.assertEqual(corner_offsets.shape, (2, 4, 4, 4))
+    self.assertEqual(foreground.shape, (2, 4, 4))
+
+    self.assertAllClose(foreground[0],
+                        [[1, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+
+    self.assertAllClose(corner_offsets[0, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, -3]])
+    self.assertAllClose(corner_offsets[0, :, :, 1],
+                        [[0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, -3]])
+    self.assertAllClose(corner_offsets[0, :, :, 2],
+                        [[4, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+    self.assertAllClose(corner_offsets[0, :, :, 3],
+                        [[4, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 0],
+                         [0, 0, 0, 1]])
+
+    self.assertAllClose(foreground[1],
+                        [[1, 1, 0, 0],
+                         [1, 1, 1, 1],
+                         [0, 1, 1, 1],
+                         [0, 1, 1, 1]])
+
+    self.assertAllClose(corner_offsets[1, :, :, 0],
+                        [[0, 0, 0, 0],
+                         [-1, -1, 0, 0],
+                         [0, -1, -1, -1],
+                         [0, -2, -2, -2]])
+    self.assertAllClose(corner_offsets[1, :, :, 1],
+                        [[0, -1, 0, 0],
+                         [0, -1, -1, -2],
+                         [0, 0, -1, -2],
+                         [0, 0, -1, -2]])
+    self.assertAllClose(corner_offsets[1, :, :, 2],
+                        [[1, 1, 0, 0],
+                         [0, 0, 3, 3],
+                         [0, 2, 2, 2],
+                         [0, 1, 1, 1]])
+    self.assertAllClose(corner_offsets[1, :, :, 3],
+                        [[1, 0, 0, 0],
+                         [1, 0, 2, 1],
+                         [0, 3, 2, 1],
+                         [0, 3, 2, 1]])
+
+  def test_assign_corner_offsets_no_objects(self):
+    """Test assignment works with empty input on cpu."""
+    assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
+
+    def graph_fn():
+      boxes = [
+          tf.zeros((0, 4), dtype=tf.float32)
+      ]
+      masks = [tf.zeros((0, 5, 5), dtype=tf.float32)]
+      return assigner.assign_corner_offset_targets(boxes, masks)
+
+    corner_offsets, foreground = self.execute_cpu(graph_fn, [])
+    self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4)))
+    self.assertAllClose(foreground, np.zeros((1, 5, 5)))
+
+
 if __name__ == '__main__':
  tf.enable_v2_behavior()
  tf.test.main()
--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils

 # pylint: disable=g-import-not-at-top
 try:
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
               num_additional_channels=0,
               load_multiclass_scores=False,
               load_context_features=False,
-               expand_hierarchy_labels=False):
+               expand_hierarchy_labels=False,
+               load_dense_pose=False):
    """Constructor sets keys_to_features and items_to_handlers.

    Args:
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        account the provided hierarchy in the label_map_proto_file. For positive
        classes, the labels are extended to ancestor. For negative classes,
        the labels are expanded to descendants.
+      load_dense_pose: Whether to load DensePose annotations.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if load_dense_pose:
+      self.keys_to_features['image/object/densepose/num'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/part_index'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/x'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/y'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/u'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/v'] = (
+          tf.VarLenFeature(tf.float32))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_num_points] = (
+              slim_example_decoder.Tensor('image/object/densepose/num'))
+      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+          slim_example_decoder.ItemHandlerCallback(
+              ['image/object/densepose/part_index',
+               'image/object/densepose/num'], self._dense_pose_part_indices))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_surface_coords] = (
+              slim_example_decoder.ItemHandlerCallback(
+                  ['image/object/densepose/x', 'image/object/densepose/y',
+                   'image/object/densepose/u', 'image/object/densepose/v',
+                   'image/object/densepose/num'],
+                  self._dense_pose_surface_coordinates))
+
    if label_map_proto_file:
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)

+    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+          dtype=tf.int32)
+      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+          dtype=tf.int32)
+
    return tensor_dict

  def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
        lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))

+  def _dense_pose_part_indices(self, keys_to_tensors):
+    """Creates a tensor that contains part indices for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 2-D int32 tensor of shape [num_instances, num_points] where each element
+      contains the DensePose part index (0-23). The value `num_points`
+      corresponds to the maximum number of sampled points across all instances
+      in the image. Note that instances with less sampled points will be padded
+      with zeros in the last dimension.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    part_index = keys_to_tensors['image/object/densepose/part_index']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(part_index, tf.SparseTensor):
+      part_index = tf.sparse_tensor_to_dense(part_index)
+    part_index = tf.cast(part_index, dtype=tf.int32)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_parts_tensor(instance_ind):
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      part_inds = part_index[points_range_start:points_range_end]
+      return shape_utils.pad_or_clip_nd(part_inds,
+                                        output_shape=[max_points_per_instance])
+
+    return tf.map_fn(pad_parts_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.int32)
+
+  def _dense_pose_surface_coordinates(self, keys_to_tensors):
+    """Creates a tensor that contains surface coords for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+      point contains (y, x, v, u) data for each sampled DensePose point. The
+      (y, x) coordinate has normalized image locations for the point, and (v, u)
+      contains the surface coordinate (also normalized) for the part. The value
+      `num_points` corresponds to the maximum number of sampled points across
+      all instances in the image. Note that instances with less sampled points
+      will be padded with zeros in dim=1.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    dp_y = keys_to_tensors['image/object/densepose/y']
+    dp_x = keys_to_tensors['image/object/densepose/x']
+    dp_v = keys_to_tensors['image/object/densepose/v']
+    dp_u = keys_to_tensors['image/object/densepose/u']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(dp_y, tf.SparseTensor):
+      dp_y = tf.sparse_tensor_to_dense(dp_y)
+    if isinstance(dp_x, tf.SparseTensor):
+      dp_x = tf.sparse_tensor_to_dense(dp_x)
+    if isinstance(dp_v, tf.SparseTensor):
+      dp_v = tf.sparse_tensor_to_dense(dp_v)
+    if isinstance(dp_u, tf.SparseTensor):
+      dp_u = tf.sparse_tensor_to_dense(dp_u)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_surface_coordinates_tensor(instance_ind):
+      """Pads DensePose surface coordinates for each instance."""
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      y = dp_y[points_range_start:points_range_end]
+      x = dp_x[points_range_start:points_range_end]
+      v = dp_v[points_range_start:points_range_end]
+      u = dp_u[points_range_start:points_range_end]
+      # Create [num_points_i, 4] tensor, where num_points_i is the number of
+      # sampled points for instance i.
+      unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+      return shape_utils.pad_or_clip_nd(
+          unpadded_tensor, output_shape=[max_points_per_instance, 4])
+
+    return tf.map_fn(pad_surface_coordinates_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.float32)
+
  def _expand_image_label_hierarchy(self, image_classes, image_confidences):
    """Expand image level labels according to the hierarchy.


--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
@@ -1096,8 +1096,8 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+    self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
+                     tensor_dict)

  def testDecodeImageLabels(self):
    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -1116,8 +1116,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn_1, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 2]))
@@ -1152,8 +1151,7 @@ class TfExampleDecoderTest(test_case.TestCase):
      return example_decoder.decode(tf.convert_to_tensor(example))

    tensor_dict = self.execute_cpu(graph_fn_2, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
    self.assertAllEqual(
        tensor_dict[fields.InputDataFields.groundtruth_image_classes],
        np.array([1, 3]))
@@ -1345,6 +1343,93 @@ class TfExampleDecoderTest(test_case.TestCase):
        expected_image_confidence,
        tensor_dict[fields.InputDataFields.groundtruth_image_confidences])

+  def testDecodeDensePose(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    bbox_ymins = [0.0, 4.0, 2.0]
+    bbox_xmins = [1.0, 5.0, 8.0]
+    bbox_ymaxs = [2.0, 6.0, 1.0]
+    bbox_xmaxs = [3.0, 7.0, 3.3]
+    densepose_num = [0, 4, 2]
+    densepose_part_index = [2, 2, 3, 4, 2, 9]
+    densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
+    densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+    densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
+
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/object/bbox/ymin':
+                      dataset_util.float_list_feature(bbox_ymins),
+                  'image/object/bbox/xmin':
+                      dataset_util.float_list_feature(bbox_xmins),
+                  'image/object/bbox/ymax':
+                      dataset_util.float_list_feature(bbox_ymaxs),
+                  'image/object/bbox/xmax':
+                      dataset_util.float_list_feature(bbox_xmaxs),
+                  'image/object/densepose/num':
+                      dataset_util.int64_list_feature(densepose_num),
+                  'image/object/densepose/part_index':
+                      dataset_util.int64_list_feature(densepose_part_index),
+                  'image/object/densepose/x':
+                      dataset_util.float_list_feature(densepose_x),
+                  'image/object/densepose/y':
+                      dataset_util.float_list_feature(densepose_y),
+                  'image/object/densepose/u':
+                      dataset_util.float_list_feature(densepose_u),
+                  'image/object/densepose/v':
+                      dataset_util.float_list_feature(densepose_v),
+
+              })).SerializeToString()
+
+      example_decoder = tf_example_decoder.TfExampleDecoder(
+          load_dense_pose=True)
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
+      dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
+      dp_surface_coords = output[
+          fields.InputDataFields.groundtruth_dp_surface_coords]
+      return dp_num_points, dp_part_ids, dp_surface_coords
+
+    dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
+        graph_fn, [])
+
+    expected_dp_num_points = [0, 4, 2]
+    expected_dp_part_ids = [
+        [0, 0, 0, 0],
+        [2, 2, 3, 4],
+        [2, 9, 0, 0]
+    ]
+    expected_dp_surface_coords = np.array(
+        [
+            # Instance 0 (no points).
+            [[0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            # Instance 1 (4 points).
+            [[0.9, 0.1, 0.99, 0.01],
+             [0.8, 0.2, 0.98, 0.02],
+             [0.7, 0.3, 0.97, 0.03],
+             [0.6, 0.4, 0.96, 0.04]],
+            # Instance 2 (2 points).
+            [[0.5, 0.5, 0.95, 0.05],
+             [0.4, 0.6, 0.94, 0.06],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+        ], dtype=np.float32)
+
+    self.assertAllEqual(dp_num_points, expected_dp_num_points)
+    self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
+    self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
+

 if __name__ == '__main__':
  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -43,70 +43,22 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import argparse
 import copy
 import datetime
 import io
 import itertools
 import json
 import os
-
-from absl import app
-from absl import flags
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import six
-import tensorflow as tf
-
-from apache_beam import runners
-
-
-flags.DEFINE_string('input_tfrecord', None, 'TFRecord containing images in '
-                    'tf.Example format for object detection, with bounding'
-                    'boxes and contextual feature embeddings.')
-flags.DEFINE_string('output_tfrecord', None,
-                    'TFRecord containing images in tf.Example format, with '
-                    'added contextual memory banks.')
-flags.DEFINE_string('sequence_key', None, 'Key to use when grouping sequences: '
-                    'so far supports `image/seq_id` and `image/location`.')
-flags.DEFINE_string('time_horizon', None, 'What time horizon to use when '
-                    'splitting the data, if any. Options are: `year`, `month`,'
-                    ' `week`, `day `, `hour`, `minute`, `None`.')
-flags.DEFINE_integer('subsample_context_features_rate', 0, 'Whether to '
-                     'subsample the context_features, and if so how many to '
-                     'sample. If the rate is set to X, it will sample context '
-                     'from 1 out of every X images. Default is sampling from '
-                     'every image, which is X=0.')
-flags.DEFINE_boolean('reduce_image_size', True, 'downsamples images to'
-                     'have longest side max_image_dimension, maintaining aspect'
-                     ' ratio')
-flags.DEFINE_integer('max_image_dimension', 1024, 'sets max image dimension')
-flags.DEFINE_boolean('add_context_features', True, 'adds a memory bank of'
-                     'embeddings to each clip')
-flags.DEFINE_boolean('sorted_image_ids', True, 'whether the image source_ids '
-                     'are sortable to deal with date_captured tie-breaks')
-flags.DEFINE_string('image_ids_to_keep', 'All', 'path to .json list of image'
-                    'ids to keep, used for ground truth eval creation')
-flags.DEFINE_boolean('keep_context_features_image_id_list', False, 'Whether or '
-                     'not to keep a list of the image_ids corresponding to the '
-                     'memory bank')
-flags.DEFINE_boolean('keep_only_positives', False, 'Whether or not to '
-                     'keep only positive boxes based on score')
-flags.DEFINE_boolean('keep_only_positives_gt', False, 'Whether or not to '
-                     'keep only positive boxes based on gt class')
-flags.DEFINE_float('context_features_score_threshold', 0.7, 'What score '
-                   'threshold to use for boxes in context_features')
-flags.DEFINE_integer('max_num_elements_in_context_features', 2000, 'Sets max '
-                     'num elements per memory bank')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-flags.DEFINE_string('output_type', 'tf_sequence_example', 'Output type, one of '
-                    '`tf_example`, `tf_sequence_example`')
-flags.DEFINE_integer('max_clip_length', None, 'Max length for sequence '
-                     'example outputs.')
-
-FLAGS = flags.FLAGS
-
-DEFAULT_FEATURE_LENGTH = 2057
+import tensorflow.compat.v1 as tf
+
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 class ReKeyDataFn(beam.DoFn):
@@ -406,7 +358,8 @@ class GenerateContextFn(beam.DoFn):
               keep_only_positives_gt=False,
               max_num_elements_in_context_features=5000,
               pad_context_features=False,
-               output_type='tf_example', max_clip_length=None):
+               output_type='tf_example', max_clip_length=None,
+               context_feature_length=2057):
    """Initialization function.

    Args:
@@ -432,6 +385,8 @@ class GenerateContextFn(beam.DoFn):
      output_type: What type of output, tf_example of tf_sequence_example
      max_clip_length: The maximum length of a sequence example, before
        splitting into multiple
+      context_feature_length: The length of the context feature embeddings
+        stored in the input data.
    """
    self._session = None
    self._num_examples_processed = beam.metrics.Metrics.counter(
@@ -456,6 +411,7 @@ class GenerateContextFn(beam.DoFn):
    self._context_features_score_threshold = context_features_score_threshold
    self._max_num_elements_in_context_features = (
        max_num_elements_in_context_features)
+    self._context_feature_length = context_feature_length

    self._images_kept = beam.metrics.Metrics.counter(
        'sequence_data_generation', 'images_kept')
@@ -506,9 +462,9 @@ class GenerateContextFn(beam.DoFn):
      context_features_image_id_list.append(example_image_id)

    if not example_embedding:
-      example_embedding.append(np.zeros(DEFAULT_FEATURE_LENGTH))
+      example_embedding.append(np.zeros(self._context_feature_length))

-    feature_length = DEFAULT_FEATURE_LENGTH
+    feature_length = self._context_feature_length

    # If the example_list is not empty and image/embedding_length is in the
    # featture dict, feature_length will be assigned to that. Otherwise, it will
@@ -703,7 +659,8 @@ class GenerateContextFn(beam.DoFn):
    return list_of_examples


-def construct_pipeline(input_tfrecord,
+def construct_pipeline(pipeline,
+                       input_tfrecord,
                       output_tfrecord,
                       sequence_key,
                       time_horizon=None,
@@ -720,10 +677,12 @@ def construct_pipeline(input_tfrecord,
                       max_num_elements_in_context_features=5000,
                       num_shards=0,
                       output_type='tf_example',
-                       max_clip_length=None):
+                       max_clip_length=None,
+                       context_feature_length=2057):
  """Returns a beam pipeline to run object detection inference.

  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
@@ -755,91 +714,224 @@ def construct_pipeline(input_tfrecord,
    output_type: What type of output, tf_example of tf_sequence_example
    max_clip_length: The maximum length of a sequence example, before
      splitting into multiple
+    context_feature_length: The length of the context feature embeddings stored
+      in the input data.
  """
-  def pipeline(root):
-    if output_type == 'tf_example':
-      coder = beam.coders.ProtoCoder(tf.train.Example)
-    elif output_type == 'tf_sequence_example':
-      coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
-    else:
-      raise ValueError('Unsupported output type.')
-    input_collection = (
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-            input_tfrecord,
-            coder=beam.coders.BytesCoder()))
-    rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
-        ReKeyDataFn(sequence_key, time_horizon,
-                    reduce_image_size, max_image_dimension))
-    grouped_collection = (
-        rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
-    grouped_collection = (
-        grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
-    ordered_collection = (
-        grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
-            SortGroupedDataFn(sequence_key, sorted_image_ids,
-                              max_num_elements_in_context_features)))
-    ordered_collection = (
-        ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
-    output_collection = (
-        ordered_collection | 'AddContextToExamples' >> beam.ParDo(
-            GenerateContextFn(
-                sequence_key, add_context_features, image_ids_to_keep,
-                keep_context_features_image_id_list=(
-                    keep_context_features_image_id_list),
-                subsample_context_features_rate=subsample_context_features_rate,
-                keep_only_positives=keep_only_positives,
-                keep_only_positives_gt=keep_only_positives_gt,
-                context_features_score_threshold=(
-                    context_features_score_threshold),
-                max_num_elements_in_context_features=(
-                    max_num_elements_in_context_features),
-                output_type=output_type,
-                max_clip_length=max_clip_length)))
-
-    output_collection = (
-        output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-        output_tfrecord,
-        num_shards=num_shards,
-        coder=coder)
-  return pipeline
-
-
-def main(_):
-  """Runs the Beam pipeline that builds context features.
+  if output_type == 'tf_example':
+    coder = beam.coders.ProtoCoder(tf.train.Example)
+  elif output_type == 'tf_sequence_example':
+    coder = beam.coders.ProtoCoder(tf.train.SequenceExample)
+  else:
+    raise ValueError('Unsupported output type.')
+  input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
+          coder=beam.coders.BytesCoder()))
+  rekey_collection = input_collection | 'RekeyExamples' >> beam.ParDo(
+      ReKeyDataFn(sequence_key, time_horizon,
+                  reduce_image_size, max_image_dimension))
+  grouped_collection = (
+      rekey_collection | 'GroupBySequenceKey' >> beam.GroupByKey())
+  grouped_collection = (
+      grouped_collection | 'ReshuffleGroups' >> beam.Reshuffle())
+  ordered_collection = (
+      grouped_collection | 'OrderByFrameNumber' >> beam.ParDo(
+          SortGroupedDataFn(sequence_key, sorted_image_ids,
+                            max_num_elements_in_context_features)))
+  ordered_collection = (
+      ordered_collection | 'ReshuffleSortedGroups' >> beam.Reshuffle())
+  output_collection = (
+      ordered_collection | 'AddContextToExamples' >> beam.ParDo(
+          GenerateContextFn(
+              sequence_key, add_context_features, image_ids_to_keep,
+              keep_context_features_image_id_list=(
+                  keep_context_features_image_id_list),
+              subsample_context_features_rate=subsample_context_features_rate,
+              keep_only_positives=keep_only_positives,
+              keep_only_positives_gt=keep_only_positives_gt,
+              context_features_score_threshold=(
+                  context_features_score_threshold),
+              max_num_elements_in_context_features=(
+                  max_num_elements_in_context_features),
+              output_type=output_type,
+              max_clip_length=max_clip_length,
+              context_feature_length=context_feature_length)))
+
+  output_collection = (
+      output_collection | 'ReshuffleExamples' >> beam.Reshuffle())
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      output_tfrecord,
+      num_shards=num_shards,
+      coder=coder)
+
+
+def parse_args(argv):
+  """Command-line argument parser.

  Args:
-    _: unused
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
  """
-  # must create before flags are used
-  runner = runners.DirectRunner()
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--input_tfrecord',
+      dest='input_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format for object '
+      'detection, with bounding boxes and contextual feature embeddings.')
+  parser.add_argument(
+      '--output_tfrecord',
+      dest='output_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format, with added '
+      'contextual memory banks.')
+  parser.add_argument(
+      '--sequence_key',
+      dest='sequence_key',
+      default='image/location',
+      help='Key to use when grouping sequences: so far supports `image/seq_id` '
+      'and `image/location`.')
+  parser.add_argument(
+      '--context_feature_length',
+      dest='context_feature_length',
+      default=2057,
+      help='The length of the context feature embeddings stored in the input '
+      'data.')
+  parser.add_argument(
+      '--time_horizon',
+      dest='time_horizon',
+      default=None,
+      help='What time horizon to use when splitting the data, if any. Options '
+      'are: `year`, `month`, `week`, `day `, `hour`, `minute`, `None`.')
+  parser.add_argument(
+      '--subsample_context_features_rate',
+      dest='subsample_context_features_rate',
+      default=0,
+      help='Whether to subsample the context_features, and if so how many to '
+      'sample. If the rate is set to X, it will sample context from 1 out of '
+      'every X images. Default is sampling from every image, which is X=0.')
+  parser.add_argument(
+      '--reduce_image_size',
+      dest='reduce_image_size',
+      default=True,
+      help='downsamples images to have longest side max_image_dimension, '
+      'maintaining aspect ratio')
+  parser.add_argument(
+      '--max_image_dimension',
+      dest='max_image_dimension',
+      default=1024,
+      help='Sets max image dimension for resizing.')
+  parser.add_argument(
+      '--add_context_features',
+      dest='add_context_features',
+      default=True,
+      help='Adds a memory bank of embeddings to each clip')
+  parser.add_argument(
+      '--sorted_image_ids',
+      dest='sorted_image_ids',
+      default=True,
+      help='Whether the image source_ids are sortable to deal with '
+      'date_captured tie-breaks.')
+  parser.add_argument(
+      '--image_ids_to_keep',
+      dest='image_ids_to_keep',
+      default='All',
+      help='Path to .json list of image ids to keep, used for ground truth '
+      'eval creation.')
+  parser.add_argument(
+      '--keep_context_features_image_id_list',
+      dest='keep_context_features_image_id_list',
+      default=False,
+      help='Whether or not to keep a list of the image_ids corresponding to '
+      'the memory bank.')
+  parser.add_argument(
+      '--keep_only_positives',
+      dest='keep_only_positives',
+      default=False,
+      help='Whether or not to keep only positive boxes based on score.')
+  parser.add_argument(
+      '--context_features_score_threshold',
+      dest='context_features_score_threshold',
+      default=0.7,
+      help='What score threshold to use for boxes in context_features, when '
+      '`keep_only_positives` is set to `True`.')
+  parser.add_argument(
+      '--keep_only_positives_gt',
+      dest='keep_only_positives_gt',
+      default=False,
+      help='Whether or not to keep only positive boxes based on gt class.')
+  parser.add_argument(
+      '--max_num_elements_in_context_features',
+      dest='max_num_elements_in_context_features',
+      default=2000,
+      help='Sets max number of context feature elements per memory bank. '
+      'If the number of images in the context group is greater than '
+      '`max_num_elements_in_context_features`, the context group will be split.'
+      )
+  parser.add_argument(
+      '--output_type',
+      dest='output_type',
+      default='tf_example',
+      help='Output type, one of `tf_example`, `tf_sequence_example`.')
+  parser.add_argument(
+      '--max_clip_length',
+      dest='max_clip_length',
+      default=None,
+      help='Max length for sequence example outputs.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+
+
+def main(argv=None, save_main_session=True):
+  """Runs the Beam pipeline that performs inference.

-  dirname = os.path.dirname(FLAGS.output_tfrecord)
+  Args:
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
+  """
+  args, pipeline_args = parse_args(argv)
+
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
+
+  dirname = os.path.dirname(args.output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.input_tfrecord,
-                         FLAGS.output_tfrecord,
-                         FLAGS.sequence_key,
-                         FLAGS.time_horizon,
-                         FLAGS.subsample_context_features_rate,
-                         FLAGS.reduce_image_size,
-                         FLAGS.max_image_dimension,
-                         FLAGS.add_context_features,
-                         FLAGS.sorted_image_ids,
-                         FLAGS.image_ids_to_keep,
-                         FLAGS.keep_context_features_image_id_list,
-                         FLAGS.keep_only_positives,
-                         FLAGS.context_features_score_threshold,
-                         FLAGS.keep_only_positives_gt,
-                         FLAGS.max_num_elements_in_context_features,
-                         FLAGS.num_shards,
-                         FLAGS.output_type,
-                         FLAGS.max_clip_length))
+
+  p = beam.Pipeline(options=pipeline_options)
+
+  construct_pipeline(
+      p,
+      args.input_tfrecord,
+      args.output_tfrecord,
+      args.sequence_key,
+      args.time_horizon,
+      args.subsample_context_features_rate,
+      args.reduce_image_size,
+      args.max_image_dimension,
+      args.add_context_features,
+      args.sorted_image_ids,
+      args.image_ids_to_keep,
+      args.keep_context_features_image_id_list,
+      args.keep_only_positives,
+      args.context_features_score_threshold,
+      args.keep_only_positives_gt,
+      args.max_num_elements_in_context_features,
+      args.output_type,
+      args.max_clip_length,
+      args.context_feature_length)
+
+  p.run()


 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'input_tfrecord',
-      'output_tfrecord'
-  ])
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
@@ -22,13 +22,19 @@ import datetime
 import os
 import tempfile
 import unittest
+
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf

 from object_detection.dataset_tools.context_rcnn import add_context_to_examples
 from object_detection.utils import tf_version
-from apache_beam import runners
+
+
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 @contextlib.contextmanager
@@ -200,7 +206,7 @@ class GenerateContextDataTest(tf.test.TestCase):
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])

  def assert_expected_key(self, key):
-    self.assertAllEqual(key, '01')
+    self.assertAllEqual(key, b'01')

  def assert_sorted(self, example_collection):
    example_list = list(example_collection)
@@ -329,19 +335,22 @@ class GenerateContextDataTest(tf.test.TestCase):
    with InMemoryTFRecord(
        [self._create_first_tf_example(),
         self._create_second_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      sequence_key = six.ensure_binary('image/seq_id')
      max_num_elements = 10
      num_shards = 1
-      pipeline = add_context_to_examples.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      add_context_to_examples.construct_pipeline(
+          p,
          input_tfrecord,
          output_tfrecord,
          sequence_key,
          max_num_elements_in_context_features=max_num_elements,
          num_shards=num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
@@ -355,20 +364,23 @@ class GenerateContextDataTest(tf.test.TestCase):
    with InMemoryTFRecord(
        [self._create_first_tf_example(),
         self._create_second_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      sequence_key = six.ensure_binary('image/seq_id')
      max_num_elements = 10
      num_shards = 1
-      pipeline = add_context_to_examples.construct_pipeline(
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      add_context_to_examples.construct_pipeline(
+          p,
          input_tfrecord,
          output_tfrecord,
          sequence_key,
          max_num_elements_in_context_features=max_num_elements,
          num_shards=num_shards,
          output_type='tf_sequence_example')
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(

--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -33,31 +33,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import argparse
 import hashlib
 import io
 import json
 import logging
 import os
-from absl import app
-from absl import flags
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
 from object_detection.utils import dataset_util

-flags.DEFINE_string('image_directory', None, 'Directory where images are '
-                    'stored')
-flags.DEFINE_string('output_tfrecord_prefix', None,
-                    'TFRecord containing images in tf.Example format.')
-flags.DEFINE_string('input_annotations_file', None, 'Path to Coco-CameraTraps'
-                    'style annotations file')
-flags.DEFINE_integer('num_images_per_shard',
-                     200,
-                     'The number of  images to be stored in each shard.')
-
-FLAGS = flags.FLAGS
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 class ParseImage(beam.DoFn):
@@ -243,13 +233,14 @@ class ParseImage(beam.DoFn):
    return [(example)]


-def _load_json_data(data_file):
+def load_json_data(data_file):
  with tf.io.gfile.GFile(data_file, 'r') as fid:
    data_dict = json.load(fid)
  return data_dict


-def create_pipeline(image_directory,
+def create_pipeline(pipeline,
+                    image_directory,
                    input_annotations_file,
                    output_tfrecord_prefix=None,
                    num_images_per_shard=200,
@@ -257,68 +248,97 @@ def create_pipeline(image_directory,
  """Creates a beam pipeline for producing a COCO-CameraTraps Image dataset.

  Args:
+    pipeline: Initialized beam pipeline.
    image_directory: Path to image directory
    input_annotations_file: Path to a coco-cameratraps annotation file
    output_tfrecord_prefix: Absolute path for tfrecord outputs. Final files will
      be named {output_tfrecord_prefix}@N.
    num_images_per_shard: The number of images to store in each shard
    keep_bboxes: Whether to keep any bounding boxes that exist in the json file
-
-  Returns:
-    A Beam pipeline.
  """

  logging.info('Reading data from COCO-CameraTraps Dataset.')

-  data = _load_json_data(input_annotations_file)
+  data = load_json_data(input_annotations_file)

  num_shards = int(np.ceil(float(len(data['images']))/num_images_per_shard))

-  def pipeline(root):
-    """Builds beam pipeline."""
-
-    image_examples = (
-        root
-        | ('CreateCollections') >> beam.Create(
-            [im['id'] for im in data['images']])
-        | ('ParseImage') >> beam.ParDo(ParseImage(
-            image_directory, data['images'], data['annotations'],
-            data['categories'], keep_bboxes=keep_bboxes)))
-    _ = (image_examples
-         | ('Reshuffle') >> beam.Reshuffle()
-         | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
-             output_tfrecord_prefix,
-             num_shards=num_shards,
-             coder=beam.coders.ProtoCoder(tf.train.Example)))
+  image_examples = (
+      pipeline | ('CreateCollections') >> beam.Create(
+          [im['id'] for im in data['images']])
+      | ('ParseImage') >> beam.ParDo(ParseImage(
+          image_directory, data['images'], data['annotations'],
+          data['categories'], keep_bboxes=keep_bboxes)))
+  _ = (image_examples
+       | ('Reshuffle') >> beam.Reshuffle()
+       | ('WriteTfImageExample') >> beam.io.tfrecordio.WriteToTFRecord(
+           output_tfrecord_prefix,
+           num_shards=num_shards,
+           coder=beam.coders.ProtoCoder(tf.train.Example)))

-  return pipeline

+def parse_args(argv):
+  """Command-line argument parser.

-def main(_):
+  Args:
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
+  """
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--image_directory',
+      dest='image_directory',
+      required=True,
+      help='Path to the directory where the images are stored.')
+  parser.add_argument(
+      '--output_tfrecord_prefix',
+      dest='output_tfrecord_prefix',
+      required=True,
+      help='Path and prefix to store TFRecords containing images in tf.Example'
+      'format.')
+  parser.add_argument(
+      '--input_annotations_file',
+      dest='input_annotations_file',
+      required=True,
+      help='Path to Coco-CameraTraps style annotations file.')
+  parser.add_argument(
+      '--num_images_per_shard',
+      dest='num_images_per_shard',
+      default=200,
+      help='The number of  images to be stored in each outputshard.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+
+
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.

  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
+  args, pipeline_args = parse_args(argv)

-  # must create before flags are used
-  runner = runners.DirectRunner()
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)

-  dirname = os.path.dirname(FLAGS.output_tfrecord_prefix)
+  dirname = os.path.dirname(args.output_tfrecord_prefix)
  tf.io.gfile.makedirs(dirname)

-  runner.run(
-      create_pipeline(
-          image_directory=FLAGS.image_directory,
-          input_annotations_file=FLAGS.input_annotations_file,
-          output_tfrecord_prefix=FLAGS.output_tfrecord_prefix,
-          num_images_per_shard=FLAGS.num_images_per_shard))
+  p = beam.Pipeline(options=pipeline_options)
+  create_pipeline(
+      pipeline=p,
+      image_directory=args.image_directory,
+      input_annotations_file=args.input_annotations_file,
+      output_tfrecord_prefix=args.output_tfrecord_prefix,
+      num_images_per_shard=args.num_images_per_shard)
+  p.run()


 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'image_directory',
-      'input_annotations_file',
-      'output_tfrecord_prefix'
-  ])
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
@@ -21,13 +21,18 @@ import json
 import os
 import tempfile
 import unittest
+
 import numpy as np

 from PIL import Image
 import tensorflow.compat.v1 as tf
 from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
 from object_detection.utils import tf_version
-from apache_beam import runners
+
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
@@ -95,13 +100,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])

    # Check other essential attributes.
    self.assertAllEqual(
@@ -112,7 +117,7 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        [self.IMAGE_WIDTH])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['im_0'])
+        [b'im_0'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)

@@ -134,13 +139,13 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [1])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['animal'])
+        .bytes_list.value, [b'animal'])

    # Check other essential attributes.
    self.assertAllEqual(
@@ -151,21 +156,23 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        [self.IMAGE_WIDTH])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['im_0'])
+        [b'im_0'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)

  def test_beam_pipeline(self):
-    runner = runners.DirectRunner()
    num_frames = 1
    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
    json_path = self._create_json_file(temp_dir, num_frames)
    output_tfrecord = temp_dir+'/output'
    self._write_random_images_to_directory(temp_dir, num_frames)
-    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
-        temp_dir, json_path,
+    pipeline_options = beam.options.pipeline_options.PipelineOptions(
+        runner='DirectRunner')
+    p = beam.Pipeline(options=pipeline_options)
+    create_cococameratraps_tfexample_main.create_pipeline(
+        p, temp_dir, json_path,
        output_tfrecord_prefix=output_tfrecord)
-    runner.run(pipeline)
+    p.run()
    filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
    actual_output = []
    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])
@@ -176,17 +183,19 @@ class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):
        actual_output[0]))

  def test_beam_pipeline_bbox(self):
-    runner = runners.DirectRunner()
    num_frames = 1
    temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
    json_path = self._create_json_file(temp_dir, num_frames, keep_bboxes=True)
    output_tfrecord = temp_dir+'/output'
    self._write_random_images_to_directory(temp_dir, num_frames)
-    pipeline = create_cococameratraps_tfexample_main.create_pipeline(
-        temp_dir, json_path,
+    pipeline_options = beam.options.pipeline_options.PipelineOptions(
+        runner='DirectRunner')
+    p = beam.Pipeline(options=pipeline_options)
+    create_cococameratraps_tfexample_main.create_pipeline(
+        p, temp_dir, json_path,
        output_tfrecord_prefix=output_tfrecord,
        keep_bboxes=True)
-    runner.run(pipeline)
+    p.run()
    filenames = tf.io.gfile.glob(output_tfrecord+'-?????-of-?????')
    actual_output = []
    record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])

--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -45,26 +45,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import argparse
 import os
 import threading
-from absl import app
-from absl import flags
-import apache_beam as beam
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
-
-
-flags.DEFINE_string('detection_input_tfrecord', None, 'TFRecord containing '
-                    'images in tf.Example format for object detection.')
-flags.DEFINE_string('detection_output_tfrecord', None,
-                    'TFRecord containing detections in tf.Example format.')
-flags.DEFINE_string('detection_model_dir', None, 'Path to directory containing'
-                    'an object detection SavedModel.')
-flags.DEFINE_float('confidence_threshold', 0.9,
-                   'Min confidence to keep bounding boxes')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-
-FLAGS = flags.FLAGS
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 class GenerateDetectionDataFn(beam.DoFn):
@@ -205,58 +193,103 @@ class GenerateDetectionDataFn(beam.DoFn):
    return [example]


-def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
                       confidence_threshold, num_shards):
  """Returns a Beam pipeline to run object detection inference.

  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: A TFRecord of tf.train.Example protos containing images.
    output_tfrecord: A TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
    model_dir: Path to `saved_model` to use for inference.
    confidence_threshold: Threshold to use when keeping detection results.
    num_shards: The number of output shards.
+  """
+  input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
+          coder=beam.coders.BytesCoder()))
+  output_collection = input_collection | 'RunInference' >> beam.ParDo(
+      GenerateDetectionDataFn(model_dir, confidence_threshold))
+  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      output_tfrecord,
+      num_shards=num_shards,
+      coder=beam.coders.ProtoCoder(tf.train.Example))
+
+
+def parse_args(argv):
+  """Command-line argument parser.
+
+  Args:
+    argv: command line arguments
  Returns:
-    pipeline: A Beam pipeline.
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
  """
-  def pipeline(root):
-    input_collection = (
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-            input_tfrecord,
-            coder=beam.coders.BytesCoder()))
-    output_collection = input_collection | 'RunInference' >> beam.ParDo(
-        GenerateDetectionDataFn(model_dir, confidence_threshold))
-    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-        output_tfrecord,
-        num_shards=num_shards,
-        coder=beam.coders.ProtoCoder(tf.train.Example))
-  return pipeline
-
-
-def main(_):
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--detection_input_tfrecord',
+      dest='detection_input_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format for object '
+      'detection.')
+  parser.add_argument(
+      '--detection_output_tfrecord',
+      dest='detection_output_tfrecord',
+      required=True,
+      help='TFRecord containing detections in tf.Example format.')
+  parser.add_argument(
+      '--detection_model_dir',
+      dest='detection_model_dir',
+      required=True,
+      help='Path to directory containing an object detection SavedModel.')
+  parser.add_argument(
+      '--confidence_threshold',
+      dest='confidence_threshold',
+      default=0.9,
+      help='Min confidence to keep bounding boxes.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+
+
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.

  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
-  # must create before flags are used
-  runner = runners.DirectRunner()

-  dirname = os.path.dirname(FLAGS.detection_output_tfrecord)
+  args, pipeline_args = parse_args(argv)
+
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
+
+  dirname = os.path.dirname(args.detection_output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.detection_input_tfrecord,
-                         FLAGS.detection_output_tfrecord,
-                         FLAGS.detection_model_dir,
-                         FLAGS.confidence_threshold,
-                         FLAGS.num_shards))
+
+  p = beam.Pipeline(options=pipeline_options)
+
+  construct_pipeline(
+      p,
+      args.detection_input_tfrecord,
+      args.detection_output_tfrecord,
+      args.detection_model_dir,
+      args.confidence_threshold,
+      args.num_shards)
+
+  p.run()


 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'detection_input_tfrecord',
-      'detection_output_tfrecord',
-      'detection_model_dir'
-  ])
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -32,13 +32,17 @@ from object_detection.core import model
 from object_detection.dataset_tools.context_rcnn import generate_detection_data
 from object_detection.protos import pipeline_pb2
 from object_detection.utils import tf_version
-from apache_beam import runners

 if six.PY2:
  import mock  # pylint: disable=g-import-not-at-top
 else:
  mock = unittest.mock

+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
+

 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""
@@ -67,6 +71,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass

@@ -243,16 +250,18 @@ class GenerateDetectionDataTest(tf.test.TestCase):

  def test_beam_pipeline(self):
    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      saved_model_path = self._export_saved_model()
      confidence_threshold = 0.8
      num_shards = 1
-      pipeline = generate_detection_data.construct_pipeline(
-          input_tfrecord, output_tfrecord, saved_model_path,
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      generate_detection_data.construct_pipeline(
+          p, input_tfrecord, output_tfrecord, saved_model_path,
          confidence_threshold, num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(output_tfrecord + '-?????-of-?????')
      actual_output = []
      record_iterator = tf.python_io.tf_record_iterator(path=filenames[0])

--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
    --input_type tf_example \
    --pipeline_config_path path/to/faster_rcnn_model.config \
    --trained_checkpoint_prefix path/to/model.ckpt \
-    --output_directory path/to/exported_model_directory
+    --output_directory path/to/exported_model_directory \
+    --additional_output_tensor_names detection_features

 python generate_embedding_data.py \
    --alsologtostderr \
@@ -47,34 +48,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import argparse
 import datetime
 import os
 import threading
-from absl import app
-from absl import flags
-import apache_beam as beam
+
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
-from apache_beam import runners
-
-flags.DEFINE_string('embedding_input_tfrecord', None, 'TFRecord containing'
-                    'images in tf.Example format for object detection.')
-flags.DEFINE_string('embedding_output_tfrecord', None,
-                    'TFRecord containing embeddings in tf.Example format.')
-flags.DEFINE_string('embedding_model_dir', None, 'Path to directory containing'
-                    'an object detection SavedModel with'
-                    'detection_box_classifier_features in the output.')
-flags.DEFINE_integer('top_k_embedding_count', 1,
-                     'The number of top k embeddings to add to the memory bank.'
-                    )
-flags.DEFINE_integer('bottom_k_embedding_count', 0,
-                     'The number of bottom k embeddings to add to the memory '
-                     'bank.')
-flags.DEFINE_integer('num_shards', 0, 'Number of output shards.')
-

-FLAGS = flags.FLAGS
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass


 class GenerateEmbeddingDataFn(beam.DoFn):
@@ -321,12 +307,13 @@ class GenerateEmbeddingDataFn(beam.DoFn):
    return [example]


-def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
+def construct_pipeline(pipeline, input_tfrecord, output_tfrecord, model_dir,
                       top_k_embedding_count, bottom_k_embedding_count,
                       num_shards):
  """Returns a beam pipeline to run object detection inference.

  Args:
+    pipeline: Initialized beam pipeline.
    input_tfrecord: An TFRecord of tf.train.Example protos containing images.
    output_tfrecord: An TFRecord of tf.train.Example protos that contain images
      in the input TFRecord and the detections from the model.
@@ -335,44 +322,98 @@ def construct_pipeline(input_tfrecord, output_tfrecord, model_dir,
    bottom_k_embedding_count: The number of low-confidence embeddings to store.
    num_shards: The number of output shards.
  """
-  def pipeline(root):
-    input_collection = (
-        root | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
-            input_tfrecord,
-            coder=beam.coders.BytesCoder()))
-    output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
-        GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
-                                bottom_k_embedding_count))
-    output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
-    _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
-        output_tfrecord,
-        num_shards=num_shards,
-        coder=beam.coders.ProtoCoder(tf.train.Example))
-  return pipeline
-
-
-def main(_):
+  input_collection = (
+      pipeline | 'ReadInputTFRecord' >> beam.io.tfrecordio.ReadFromTFRecord(
+          input_tfrecord,
+          coder=beam.coders.BytesCoder()))
+  output_collection = input_collection | 'ExtractEmbedding' >> beam.ParDo(
+      GenerateEmbeddingDataFn(model_dir, top_k_embedding_count,
+                              bottom_k_embedding_count))
+  output_collection = output_collection | 'Reshuffle' >> beam.Reshuffle()
+  _ = output_collection | 'WritetoDisk' >> beam.io.tfrecordio.WriteToTFRecord(
+      output_tfrecord,
+      num_shards=num_shards,
+      coder=beam.coders.ProtoCoder(tf.train.Example))
+
+
+def parse_args(argv):
+  """Command-line argument parser.
+
+  Args:
+    argv: command line arguments
+  Returns:
+    beam_args: Arguments for the beam pipeline.
+    pipeline_args: Arguments for the pipeline options, such as runner type.
+  """
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--embedding_input_tfrecord',
+      dest='embedding_input_tfrecord',
+      required=True,
+      help='TFRecord containing images in tf.Example format for object '
+      'detection.')
+  parser.add_argument(
+      '--embedding_output_tfrecord',
+      dest='embedding_output_tfrecord',
+      required=True,
+      help='TFRecord containing embeddings in tf.Example format.')
+  parser.add_argument(
+      '--embedding_model_dir',
+      dest='embedding_model_dir',
+      required=True,
+      help='Path to directory containing an object detection SavedModel with'
+      'detection_box_classifier_features in the output.')
+  parser.add_argument(
+      '--top_k_embedding_count',
+      dest='top_k_embedding_count',
+      default=1,
+      help='The number of top k embeddings to add to the memory bank.')
+  parser.add_argument(
+      '--bottom_k_embedding_count',
+      dest='bottom_k_embedding_count',
+      default=0,
+      help='The number of bottom k embeddings to add to the memory bank.')
+  parser.add_argument(
+      '--num_shards',
+      dest='num_shards',
+      default=0,
+      help='Number of output shards.')
+  beam_args, pipeline_args = parser.parse_known_args(argv)
+  return beam_args, pipeline_args
+
+
+def main(argv=None, save_main_session=True):
  """Runs the Beam pipeline that performs inference.

  Args:
-    _: unused
+    argv: Command line arguments.
+    save_main_session: Whether to save the main session.
  """
-  # must create before flags are used
-  runner = runners.DirectRunner()
+  args, pipeline_args = parse_args(argv)

-  dirname = os.path.dirname(FLAGS.embedding_output_tfrecord)
+  pipeline_options = beam.options.pipeline_options.PipelineOptions(
+            pipeline_args)
+  pipeline_options.view_as(
+      beam.options.pipeline_options.SetupOptions).save_main_session = (
+          save_main_session)
+
+  dirname = os.path.dirname(args.embedding_output_tfrecord)
  tf.io.gfile.makedirs(dirname)
-  runner.run(
-      construct_pipeline(FLAGS.embedding_input_tfrecord,
-                         FLAGS.embedding_output_tfrecord,
-                         FLAGS.embedding_model_dir, FLAGS.top_k_embedding_count,
-                         FLAGS.bottom_k_embedding_count, FLAGS.num_shards))
+
+  p = beam.Pipeline(options=pipeline_options)
+
+  construct_pipeline(
+      p,
+      args.embedding_input_tfrecord,
+      args.embedding_output_tfrecord,
+      args.embedding_model_dir,
+      args.top_k_embedding_count,
+      args.bottom_k_embedding_count,
+      args.num_shards)
+
+  p.run()


 if __name__ == '__main__':
-  flags.mark_flags_as_required([
-      'embedding_input_tfrecord',
-      'embedding_output_tfrecord',
-      'embedding_model_dir'
-  ])
-  app.run(main)
+  main()
+
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -30,13 +30,18 @@ from object_detection.core import model
 from object_detection.dataset_tools.context_rcnn import generate_embedding_data
 from object_detection.protos import pipeline_pb2
 from object_detection.utils import tf_version
-from apache_beam import runners
+

 if six.PY2:
  import mock  # pylint: disable=g-import-not-at-top
 else:
  mock = unittest.mock

+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
+

 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""
@@ -73,6 +78,9 @@ class FakeModel(model.DetectionModel):
  def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
    pass

+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
  def loss(self, prediction_dict, true_image_shapes):
    pass

@@ -236,13 +244,13 @@ class GenerateEmbeddingData(tf.test.TestCase):
        .int64_list.value, [5])
    self.assertAllEqual(
        example.features.feature['image/object/class/text']
-        .bytes_list.value, ['hyena'])
+        .bytes_list.value, [b'hyena'])
    self.assertAllClose(
        example.features.feature['image/class/label']
        .int64_list.value, [5])
    self.assertAllEqual(
        example.features.feature['image/class/text']
-        .bytes_list.value, ['hyena'])
+        .bytes_list.value, [b'hyena'])

    # Check other essential attributes.
    self.assertAllEqual(
@@ -251,7 +259,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
        example.features.feature['image/width'].int64_list.value, [600])
    self.assertAllEqual(
        example.features.feature['image/source_id'].bytes_list.value,
-        ['image_id'])
+        [b'image_id'])
    self.assertTrue(
        example.features.feature['image/encoded'].bytes_list.value)

@@ -268,7 +276,7 @@ class GenerateEmbeddingData(tf.test.TestCase):
                        .int64_list.value, [5])
    self.assertAllEqual(tf.train.Example.FromString(
        generated_example).features.feature['image/object/class/text']
-                        .bytes_list.value, ['hyena'])
+                        .bytes_list.value, [b'hyena'])
    output = inference_fn.process(generated_example)
    output_example = output[0]
    self.assert_expected_example(output_example)
@@ -304,24 +312,26 @@ class GenerateEmbeddingData(tf.test.TestCase):
        .feature['image/object/class/label'].int64_list.value, [5])
    self.assertAllEqual(
        tf.train.Example.FromString(generated_example).features
-        .feature['image/object/class/text'].bytes_list.value, ['hyena'])
+        .feature['image/object/class/text'].bytes_list.value, [b'hyena'])
    output = inference_fn.process(generated_example)
    output_example = output[0]
    self.assert_expected_example(output_example, botk=True)

  def test_beam_pipeline(self):
    with InMemoryTFRecord([self._create_tf_example()]) as input_tfrecord:
-      runner = runners.DirectRunner()
      temp_dir = tempfile.mkdtemp(dir=os.environ.get('TEST_TMPDIR'))
      output_tfrecord = os.path.join(temp_dir, 'output_tfrecord')
      saved_model_path = self._export_saved_model()
      top_k_embedding_count = 1
      bottom_k_embedding_count = 0
      num_shards = 1
-      pipeline = generate_embedding_data.construct_pipeline(
-          input_tfrecord, output_tfrecord, saved_model_path,
+      pipeline_options = beam.options.pipeline_options.PipelineOptions(
+          runner='DirectRunner')
+      p = beam.Pipeline(options=pipeline_options)
+      generate_embedding_data.construct_pipeline(
+          p, input_tfrecord, output_tfrecord, saved_model_path,
          top_k_embedding_count, bottom_k_embedding_count, num_shards)
-      runner.run(pipeline)
+      p.run()
      filenames = tf.io.gfile.glob(
          output_tfrecord + '-?????-of-?????')
      actual_output = []

--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 r"""Convert raw COCO dataset to TFRecord for object_detection.

+This tool supports data generation for object detection (boxes, masks),
+keypoint detection, and DensePose.
+
 Please note that this tool creates sharded output files.

 Example usage:
@@ -63,7 +66,18 @@ tf.flags.DEFINE_string('train_keypoint_annotations_file', '',
                       'Training annotations JSON file.')
 tf.flags.DEFINE_string('val_keypoint_annotations_file', '',
                       'Validation annotations JSON file.')
+# DensePose is only available for coco 2014.
+tf.flags.DEFINE_string('train_densepose_annotations_file', '',
+                       'Training annotations JSON file for DensePose.')
+tf.flags.DEFINE_string('val_densepose_annotations_file', '',
+                       'Validation annotations JSON file for DensePose.')
 tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+# Whether to only produce images/annotations on person class (for keypoint /
+# densepose task).
+tf.flags.DEFINE_boolean('remove_non_person_annotations', False, 'Whether to '
+                        'remove all annotations for non-person objects.')
+tf.flags.DEFINE_boolean('remove_non_person_images', False, 'Whether to '
+                        'remove all examples that do not contain a person.')

 FLAGS = flags.FLAGS

@@ -77,13 +91,33 @@ _COCO_KEYPOINT_NAMES = [
    b'left_knee', b'right_knee', b'left_ankle', b'right_ankle'
 ]

+_COCO_PART_NAMES = [
+    b'torso_back', b'torso_front', b'right_hand', b'left_hand', b'left_foot',
+    b'right_foot', b'right_upper_leg_back', b'left_upper_leg_back',
+    b'right_upper_leg_front', b'left_upper_leg_front', b'right_lower_leg_back',
+    b'left_lower_leg_back', b'right_lower_leg_front', b'left_lower_leg_front',
+    b'left_upper_arm_back', b'right_upper_arm_back', b'left_upper_arm_front',
+    b'right_upper_arm_front', b'left_lower_arm_back', b'right_lower_arm_back',
+    b'left_lower_arm_front', b'right_lower_arm_front', b'right_face',
+    b'left_face',
+]
+
+_DP_PART_ID_OFFSET = 1
+
+
+def clip_to_unit(x):
+  return min(max(x, 0.0), 1.0)
+

 def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False,
-                      keypoint_annotations_dict=None):
+                      keypoint_annotations_dict=None,
+                      densepose_annotations_dict=None,
+                      remove_non_person_annotations=False,
+                      remove_non_person_images=False):
  """Converts image and annotations to a tf.Example proto.

  Args:
@@ -108,10 +142,23 @@ def create_tf_example(image,
      dictionary with keys: [u'keypoints', u'num_keypoints'] represeting the
      keypoint information for this person object annotation. If None, then
      no keypoint annotations will be populated.
+    densepose_annotations_dict: A dictionary that maps from annotation_id to a
+      dictionary with keys: [u'dp_I', u'dp_x', u'dp_y', 'dp_U', 'dp_V']
+      representing part surface coordinates. For more information see
+      http://densepose.org/.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.

  Returns:
+    key: SHA256 hash of the image.
    example: The converted tf.Example
    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+    num_keypoint_annotation_skipped: Number of keypoint annotations that were
+      skipped.
+    num_densepose_annotation_skipped: Number of DensePose annotations that were
+      skipped.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
@@ -146,6 +193,16 @@ def create_tf_example(image,
  num_annotations_skipped = 0
  num_keypoint_annotation_used = 0
  num_keypoint_annotation_skipped = 0
+  dp_part_index = []
+  dp_x = []
+  dp_y = []
+  dp_u = []
+  dp_v = []
+  dp_num_points = []
+  densepose_keys = ['dp_I', 'dp_U', 'dp_V', 'dp_x', 'dp_y', 'bbox']
+  include_densepose = densepose_annotations_dict is not None
+  num_densepose_annotation_used = 0
+  num_densepose_annotation_skipped = 0
  for object_annotations in annotations_list:
    (x, y, width, height) = tuple(object_annotations['bbox'])
    if width <= 0 or height <= 0:
@@ -154,14 +211,18 @@ def create_tf_example(image,
    if x + width > image_width or y + height > image_height:
      num_annotations_skipped += 1
      continue
+    category_id = int(object_annotations['category_id'])
+    category_name = category_index[category_id]['name'].encode('utf8')
+    if remove_non_person_annotations and category_name != b'person':
+      num_annotations_skipped += 1
+      continue
    xmin.append(float(x) / image_width)
    xmax.append(float(x + width) / image_width)
    ymin.append(float(y) / image_height)
    ymax.append(float(y + height) / image_height)
    is_crowd.append(object_annotations['iscrowd'])
-    category_id = int(object_annotations['category_id'])
    category_ids.append(category_id)
-    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    category_names.append(category_name)
    area.append(object_annotations['area'])

    if include_masks:
@@ -197,6 +258,40 @@ def create_tf_example(image,
        keypoints_visibility.extend([0] * len(_COCO_KEYPOINT_NAMES))
        keypoints_name.extend(_COCO_KEYPOINT_NAMES)
        num_keypoints.append(0)
+
+    if include_densepose:
+      annotation_id = object_annotations['id']
+      if (annotation_id in densepose_annotations_dict and
+          all(key in densepose_annotations_dict[annotation_id]
+              for key in densepose_keys)):
+        dp_annotations = densepose_annotations_dict[annotation_id]
+        num_densepose_annotation_used += 1
+        dp_num_points.append(len(dp_annotations['dp_I']))
+        dp_part_index.extend([int(i - _DP_PART_ID_OFFSET)
+                              for i in dp_annotations['dp_I']])
+        # DensePose surface coordinates are defined on a [256, 256] grid
+        # relative to each instance box (i.e. absolute coordinates in range
+        # [0., 256.]). The following converts the coordinates
+        # so that they are expressed in normalized image coordinates.
+        dp_x_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_x']]
+        dp_x_norm = [(float(x) + x_box_rel * width) / image_width
+                     for x_box_rel in dp_x_box_rel]
+        dp_y_box_rel = [
+            clip_to_unit(val / 256.) for val in dp_annotations['dp_y']]
+        dp_y_norm = [(float(y) + y_box_rel * height) / image_height
+                     for y_box_rel in dp_y_box_rel]
+        dp_x.extend(dp_x_norm)
+        dp_y.extend(dp_y_norm)
+        dp_u.extend(dp_annotations['dp_U'])
+        dp_v.extend(dp_annotations['dp_V'])
+      else:
+        dp_num_points.append(0)
+
+  if (remove_non_person_images and
+      not any(name == b'person' for name in category_names)):
+    return (key, None, num_annotations_skipped,
+            num_keypoint_annotation_skipped, num_densepose_annotation_skipped)
  feature_dict = {
      'image/height':
          dataset_util.int64_feature(image_height),
@@ -243,15 +338,34 @@ def create_tf_example(image,
        dataset_util.bytes_list_feature(keypoints_name))
    num_keypoint_annotation_skipped = (
        len(keypoint_annotations_dict) - num_keypoint_annotation_used)
+  if include_densepose:
+    feature_dict['image/object/densepose/num'] = (
+        dataset_util.int64_list_feature(dp_num_points))
+    feature_dict['image/object/densepose/part_index'] = (
+        dataset_util.int64_list_feature(dp_part_index))
+    feature_dict['image/object/densepose/x'] = (
+        dataset_util.float_list_feature(dp_x))
+    feature_dict['image/object/densepose/y'] = (
+        dataset_util.float_list_feature(dp_y))
+    feature_dict['image/object/densepose/u'] = (
+        dataset_util.float_list_feature(dp_u))
+    feature_dict['image/object/densepose/v'] = (
+        dataset_util.float_list_feature(dp_v))
+    num_densepose_annotation_skipped = (
+        len(densepose_annotations_dict) - num_densepose_annotation_used)

  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
-  return key, example, num_annotations_skipped, num_keypoint_annotation_skipped
+  return (key, example, num_annotations_skipped,
+          num_keypoint_annotation_skipped, num_densepose_annotation_skipped)


 def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards,
-                                            keypoint_annotations_file=''):
+                                            keypoint_annotations_file='',
+                                            densepose_annotations_file='',
+                                            remove_non_person_annotations=False,
+                                            remove_non_person_images=False):
  """Loads COCO annotation json files and converts to tf.Record format.

  Args:
@@ -264,6 +378,12 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
    keypoint_annotations_file: JSON file containing the person keypoint
      annotations. If empty, then no person keypoint annotations will be
      generated.
+    densepose_annotations_file: JSON file containing the DensePose annotations.
+      If empty, then no DensePose annotations will be generated.
+    remove_non_person_annotations: Whether to remove any annotations that are
+      not the "person" class.
+    remove_non_person_images: Whether to remove any images that do not contain
+      at least one "person" annotation.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
@@ -288,7 +408,8 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
-    logging.info('%d images are missing annotations.', missing_annotation_count)
+    logging.info('%d images are missing annotations.',
+                 missing_annotation_count)

    keypoint_annotations_index = {}
    if keypoint_annotations_file:
@@ -301,8 +422,20 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
            keypoint_annotations_index[image_id] = {}
          keypoint_annotations_index[image_id][annotation['id']] = annotation

+    densepose_annotations_index = {}
+    if densepose_annotations_file:
+      with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
+        densepose_groundtruth_data = json.load(fid)
+      if 'annotations' in densepose_groundtruth_data:
+        for annotation in densepose_groundtruth_data['annotations']:
+          image_id = annotation['image_id']
+          if image_id not in densepose_annotations_index:
+            densepose_annotations_index[image_id] = {}
+          densepose_annotations_index[image_id][annotation['id']] = annotation
+
    total_num_annotations_skipped = 0
    total_num_keypoint_annotations_skipped = 0
+    total_num_densepose_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(images))
@@ -312,19 +445,31 @@ def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
        keypoint_annotations_dict = {}
        if image['id'] in keypoint_annotations_index:
          keypoint_annotations_dict = keypoint_annotations_index[image['id']]
-      (_, tf_example, num_annotations_skipped,
-       num_keypoint_annotations_skipped) = create_tf_example(
+      densepose_annotations_dict = None
+      if densepose_annotations_file:
+        densepose_annotations_dict = {}
+        if image['id'] in densepose_annotations_index:
+          densepose_annotations_dict = densepose_annotations_index[image['id']]
+      (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped,
+       num_densepose_annotations_skipped) = create_tf_example(
           image, annotations_list, image_dir, category_index, include_masks,
-           keypoint_annotations_dict)
+           keypoint_annotations_dict, densepose_annotations_dict,
+           remove_non_person_annotations, remove_non_person_images)
      total_num_annotations_skipped += num_annotations_skipped
      total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
+      total_num_densepose_annotations_skipped += (
+          num_densepose_annotations_skipped)
      shard_idx = idx % num_shards
-      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+      if tf_example:
+        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    logging.info('Finished writing, skipped %d annotations.',
                 total_num_annotations_skipped)
    if keypoint_annotations_file:
      logging.info('Finished writing, skipped %d keypoint annotations.',
                   total_num_keypoint_annotations_skipped)
+    if densepose_annotations_file:
+      logging.info('Finished writing, skipped %d DensePose annotations.',
+                   total_num_densepose_annotations_skipped)


 def main(_):
@@ -347,20 +492,26 @@ def main(_):
      train_output_path,
      FLAGS.include_masks,
      num_shards=100,
-      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file)
+      keypoint_annotations_file=FLAGS.train_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.train_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.val_annotations_file,
      FLAGS.val_image_dir,
      val_output_path,
      FLAGS.include_masks,
-      num_shards=100,
-      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file)
+      num_shards=50,
+      keypoint_annotations_file=FLAGS.val_keypoint_annotations_file,
+      densepose_annotations_file=FLAGS.val_densepose_annotations_file,
+      remove_non_person_annotations=FLAGS.remove_non_person_annotations,
+      remove_non_person_images=FLAGS.remove_non_person_images)
  _create_tf_record_from_coco_annotations(
      FLAGS.testdev_annotations_file,
      FLAGS.test_image_dir,
      testdev_output_path,
      FLAGS.include_masks,
-      num_shards=100)
+      num_shards=50)


 if __name__ == '__main__':

--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -89,7 +89,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index)

    self.assertEqual(num_annotations_skipped, 0)
@@ -156,7 +156,7 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
    }

    (_, example,
-     num_annotations_skipped, _) = create_coco_tf_record.create_tf_example(
+     num_annotations_skipped, _, _) = create_coco_tf_record.create_tf_example(
         image, annotations_list, image_dir, category_index, include_masks=True)

    self.assertEqual(num_annotations_skipped, 0)
@@ -259,14 +259,14 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        }
    }

-    (_, example, _,
-     num_keypoint_annotation_skipped) = create_coco_tf_record.create_tf_example(
-         image,
-         annotations_list,
-         image_dir,
-         category_index,
-         include_masks=False,
-         keypoint_annotations_dict=keypoint_annotations_dict)
+    _, example, _, num_keypoint_annotation_skipped, _ = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            keypoint_annotations_dict=keypoint_annotations_dict))

    self.assertEqual(num_keypoint_annotation_skipped, 0)
    self._assertProtoEqual(
@@ -310,6 +310,132 @@ class CreateCocoTFRecordTest(tf.test.TestCase):
        example.features.feature[
            'image/object/keypoint/visibility'].int64_list.value, vv)

+  def test_create_tf_example_with_dense_pose(self):
+    image_dir = self.get_temp_dir()
+    image_file_name = 'tmp_image.jpg'
+    image_data = np.random.randint(low=0, high=256, size=(256, 256, 3)).astype(
+        np.uint8)
+    save_path = os.path.join(image_dir, image_file_name)
+    image = PIL.Image.fromarray(image_data, 'RGB')
+    image.save(save_path)
+
+    image = {
+        'file_name': image_file_name,
+        'height': 256,
+        'width': 256,
+        'id': 11,
+    }
+
+    min_x, min_y = 64, 64
+    max_x, max_y = 128, 128
+    keypoints = []
+    num_visible_keypoints = 0
+    xv = []
+    yv = []
+    vv = []
+    for _ in range(17):
+      xc = min_x + int(np.random.rand()*(max_x - min_x))
+      yc = min_y + int(np.random.rand()*(max_y - min_y))
+      vis = np.random.randint(0, 3)
+      xv.append(xc)
+      yv.append(yc)
+      vv.append(vis)
+      keypoints.extend([xc, yc, vis])
+      num_visible_keypoints += (vis > 0)
+
+    annotations_list = [{
+        'area': 0.5,
+        'iscrowd': False,
+        'image_id': 11,
+        'bbox': [64, 64, 128, 128],
+        'category_id': 1,
+        'id': 1000
+    }]
+
+    num_points = 45
+    dp_i = np.random.randint(1, 25, (num_points,)).astype(np.float32)
+    dp_u = np.random.randn(num_points)
+    dp_v = np.random.randn(num_points)
+    dp_x = np.random.rand(num_points)*256.
+    dp_y = np.random.rand(num_points)*256.
+    densepose_annotations_dict = {
+        1000: {
+            'dp_I': dp_i,
+            'dp_U': dp_u,
+            'dp_V': dp_v,
+            'dp_x': dp_x,
+            'dp_y': dp_y,
+            'bbox': [64, 64, 128, 128],
+        }
+    }
+
+    category_index = {
+        1: {
+            'name': 'person',
+            'id': 1
+        }
+    }
+
+    _, example, _, _, num_densepose_annotation_skipped = (
+        create_coco_tf_record.create_tf_example(
+            image,
+            annotations_list,
+            image_dir,
+            category_index,
+            include_masks=False,
+            densepose_annotations_dict=densepose_annotations_dict))
+
+    self.assertEqual(num_densepose_annotation_skipped, 0)
+    self._assertProtoEqual(
+        example.features.feature['image/height'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/width'].int64_list.value, [256])
+    self._assertProtoEqual(
+        example.features.feature['image/filename'].bytes_list.value,
+        [six.b(image_file_name)])
+    self._assertProtoEqual(
+        example.features.feature['image/source_id'].bytes_list.value,
+        [six.b(str(image['id']))])
+    self._assertProtoEqual(
+        example.features.feature['image/format'].bytes_list.value,
+        [six.b('jpeg')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymin'].float_list.value,
+        [0.25])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/xmax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/bbox/ymax'].float_list.value,
+        [0.75])
+    self._assertProtoEqual(
+        example.features.feature['image/object/class/text'].bytes_list.value,
+        [six.b('person')])
+    self._assertProtoEqual(
+        example.features.feature['image/object/densepose/num'].int64_list.value,
+        [num_points])
+    self.assertAllEqual(
+        example.features.feature[
+            'image/object/densepose/part_index'].int64_list.value,
+        dp_i.astype(np.int64) - create_coco_tf_record._DP_PART_ID_OFFSET)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/u'].float_list.value,
+        dp_u)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/v'].float_list.value,
+        dp_v)
+    expected_dp_x = (64 + dp_x * 128. / 256.) / 256.
+    expected_dp_y = (64 + dp_y * 128. / 256.) / 256.
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/x'].float_list.value,
+        expected_dp_x)
+    self.assertAllClose(
+        example.features.feature['image/object/densepose/y'].float_list.value,
+        expected_dp_y)
+
  def test_create_sharded_tf_record(self):
    tmp_dir = self.get_temp_dir()
    image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']