Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

3ce2f61b · Kaushik Shivakumar · bb16d5ca · 8e9296ff · 3ce2f61b · 3ce2f61b
Commit 3ce2f61b authored Jul 12, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/core/model.py
+++ b/research/object_detection/core/model.py
@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*}
        fields.InputDataFields.is_annotated.
    Returns:
@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    Args:
      field: a string key, options are
        fields.BoxListFields.{boxes,classes,masks,keypoints,
-        keypoint_visibilities} or
+        keypoint_visibilities, densepose_*} or
        fields.InputDataFields.is_annotated.
    Returns:
@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        detection_classes: [batch, max_detections]
          (If a model is producing class-agnostic detections, this field may be
          missing)
-        instance_masks: [batch, max_detections, image_height, image_width]
+        detection_masks: [batch, max_detections, mask_height, mask_width]
          (optional)
-        keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+        detection_keypoints: [batch, max_detections, num_keypoints, 2]
+          (optional)
+        detection_keypoint_scores: [batch, max_detections, num_keypoints]
+          (optional)
+        detection_surface_coords: [batch, max_detections, mask_height,
+          mask_width, 2] (optional)
        num_detections: [batch]
        In addition to the above fields this stage also outputs the following
@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
    """
    pass
-  def provide_groundtruth(self,
+  def provide_groundtruth(
-                          groundtruth_boxes_list,
+      self,
-                          groundtruth_classes_list,
+      groundtruth_boxes_list,
-                          groundtruth_masks_list=None,
+      groundtruth_classes_list,
-                          groundtruth_keypoints_list=None,
+      groundtruth_masks_list=None,
-                          groundtruth_keypoint_visibilities_list=None,
+      groundtruth_keypoints_list=None,
-                          groundtruth_weights_list=None,
+      groundtruth_keypoint_visibilities_list=None,
-                          groundtruth_confidences_list=None,
+      groundtruth_dp_num_points_list=None,
-                          groundtruth_is_crowd_list=None,
+      groundtruth_dp_part_ids_list=None,
-                          groundtruth_group_of_list=None,
+      groundtruth_dp_surface_coords_list=None,
-                          groundtruth_area_list=None,
+      groundtruth_weights_list=None,
-                          is_annotated_list=None,
+      groundtruth_confidences_list=None,
-                          groundtruth_labeled_classes=None):
+      groundtruth_is_crowd_list=None,
+      groundtruth_group_of_list=None,
+      groundtruth_area_list=None,
+      is_annotated_list=None,
+      groundtruth_labeled_classes=None):
    """Provide groundtruth tensors.
    Args:
@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
        `groundtruth_keypoint_visibilities_list`).
      groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
        of shape [num_boxes, num_keypoints] containing keypoint visibilities.
+      groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
+        [num_boxes] containing the number of DensePose sampled points.
+      groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding.
+      groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
+        shape [num_boxes, max_sampled_points, 4] containing the DensePose
+        surface coordinates for each sampled point. Note that there may be
+        padding.
      groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
        [num_boxes] containing weights for groundtruth boxes.
      groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
      self._groundtruth_lists[
          fields.BoxListFields.keypoint_visibilities] = (
              groundtruth_keypoint_visibilities_list)
+    if groundtruth_dp_num_points_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_num_points] = (
+              groundtruth_dp_num_points_list)
+    if groundtruth_dp_part_ids_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_part_ids] = (
+              groundtruth_dp_part_ids_list)
+    if groundtruth_dp_surface_coords_list:
+      self._groundtruth_lists[
+          fields.BoxListFields.densepose_surface_coords] = (
+              groundtruth_dp_surface_coords_list)
    if groundtruth_is_crowd_list:
      self._groundtruth_lists[
          fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list

--- a/research/object_detection/core/preprocessor.py
+++ b/research/object_detection/core/preprocessor.py
@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  Args:
    image: rank 3 float32 tensor containing 1 image ->
-           [height, width,channels].
+           [height, width, channels].
    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
  return return_values
+def random_scale_crop_and_pad_to_square(
+    image,
+    boxes,
+    labels,
+    label_weights,
+    masks=None,
+    keypoints=None,
+    scale_min=0.1,
+    scale_max=2.0,
+    output_size=512,
+    resize_method=tf.image.ResizeMethod.BILINEAR,
+    seed=None):
+  """Randomly scale, crop, and then pad an image to fixed square dimensions.
+   Randomly scale, crop, and then pad an image to the desired square output
+   dimensions. Specifically, this method first samples a random_scale factor
+   from a uniform distribution between scale_min and scale_max, and then resizes
+   the image such that it's maximum dimension is (output_size * random_scale).
+   Secondly, a square output_size crop is extracted from the resized image
+   (note, this will only occur when random_scale > 1.0). Lastly, the cropped
+   region is padded to the desired square output_size, by filling with zeros.
+   The augmentation is borrowed from [1]
+   [1]: https://arxiv.org/abs/1911.09070
+  Args:
+    image: rank 3 float32 tensor containing 1 image ->
+      [height, width, channels].
+    boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
+      are in normalized form meaning their coordinates vary between [0, 1]. Each
+      row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
+      are clipped to the boundary and boxes falling outside the crop are
+      ignored.
+    labels: rank 1 int32 tensor containing the object classes.
+    label_weights: float32 tensor of shape [num_instances] representing the
+      weight for each box.
+    masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
+      width] containing instance masks. The masks are of the same height, width
+      as the input `image`.
+    keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
+      num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
+    scale_min: float, the minimum value for the random scale factor.
+    scale_max: float, the maximum value for the random scale factor.
+    output_size: int, the desired (square) output image size.
+    resize_method: tf.image.ResizeMethod, resize method to use when scaling the
+      input images.
+    seed: random seed.
+  Returns:
+    image: image which is the same rank as input image.
+    boxes: boxes which is the same rank as input boxes.
+           Boxes are in normalized form.
+    labels: new labels.
+    label_weights: rank 1 float32 tensor with shape [num_instances].
+    masks: rank 3 float32 tensor with shape [num_instances, height, width]
+           containing instance masks.
+  """
+  img_shape = tf.shape(image)
+  input_height, input_width = img_shape[0], img_shape[1]
+  random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
+  # Compute the scaled height and width from the random scale.
+  max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
+  input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
+  input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
+  scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
+  scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
+  # Compute the offsets:
+  offset_y = tf.cast(scaled_height - output_size, tf.float32)
+  offset_x = tf.cast(scaled_width - output_size, tf.float32)
+  offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
+  offset_y = tf.cast(offset_y, tf.int32)
+  offset_x = tf.cast(offset_x, tf.int32)
+  # Scale, crop, and pad the input image.
+  scaled_image = tf.image.resize_images(
+      image, [scaled_height, scaled_width], method=resize_method)
+  scaled_image = scaled_image[offset_y:offset_y + output_size,
+                              offset_x:offset_x + output_size, :]
+  output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
+                                              output_size)
+  # Update the boxes.
+  new_window = tf.cast(
+      tf.stack([offset_y, offset_x,
+                offset_y + output_size, offset_x + output_size]),
+      dtype=tf.float32)
+  new_window /= tf.cast(
+      tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
+      dtype=tf.float32)
+  boxlist = box_list.BoxList(boxes)
+  boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+  boxlist, indices = box_list_ops.prune_completely_outside_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0])
+  boxlist = box_list_ops.clip_to_window(
+      boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
+  return_values = [output_image, boxlist.get(),
+                   tf.gather(labels, indices),
+                   tf.gather(label_weights, indices)]
+  if masks is not None:
+    new_masks = tf.expand_dims(masks, -1)
+    new_masks = tf.image.resize_images(
+        new_masks, [scaled_height, scaled_width], method=resize_method)
+    new_masks = new_masks[:, offset_y:offset_y + output_size,
+                          offset_x:offset_x + output_size, :]
+    new_masks = tf.image.pad_to_bounding_box(
+        new_masks, 0, 0, output_size, output_size)
+    new_masks = tf.squeeze(new_masks, [-1])
+    return_values.append(tf.gather(new_masks, indices))
+  if keypoints is not None:
+    keypoints = tf.gather(keypoints, indices)
+    keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
+    keypoints = keypoint_ops.prune_outside_window(
+        keypoints, [0.0, 0.0, 1.0, 1.0])
+    return_values.append(keypoints)
+  return return_values
 def get_default_func_arg_map(include_label_weights=True,
                             include_label_confidences=False,
                             include_multiclass_scores=False,
@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
      random_adjust_saturation: (fields.InputDataFields.image,),
      random_distort_color: (fields.InputDataFields.image,),
      random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
-      random_crop_image: (fields.InputDataFields.image,
+      random_crop_image:
-                          fields.InputDataFields.groundtruth_boxes,
+          (fields.InputDataFields.image,
-                          fields.InputDataFields.groundtruth_classes,
+           fields.InputDataFields.groundtruth_boxes,
-                          groundtruth_label_weights,
+           fields.InputDataFields.groundtruth_classes,
-                          groundtruth_label_confidences, multiclass_scores,
+           groundtruth_label_weights, groundtruth_label_confidences,
-                          groundtruth_instance_masks, groundtruth_keypoints,
+           multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
-                          groundtruth_keypoint_visibilities,
+           groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
-                          groundtruth_dp_num_points, groundtruth_dp_part_ids,
+           groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
-                          groundtruth_dp_surface_coords),
      random_pad_image:
          (fields.InputDataFields.image,
           fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
           fields.InputDataFields.groundtruth_classes,
           groundtruth_label_weights, groundtruth_instance_masks,
           groundtruth_keypoints),
+      random_scale_crop_and_pad_to_square:
+          (fields.InputDataFields.image,
+           fields.InputDataFields.groundtruth_boxes,
+           fields.InputDataFields.groundtruth_classes,
+           groundtruth_label_weights, groundtruth_instance_masks,
+           groundtruth_keypoints),
  }
  return prep_func_arg_map

--- a/research/object_detection/core/preprocessor_test.py
+++ b/research/object_detection/core/preprocessor_test.py
@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
                                test_masks=True,
                                test_keypoints=True)
-  @parameterized.parameters(
-      {'include_dense_pose': False},
-      {'include_dense_pose': True}
-  )
-  def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
-    def graph_fn():
-      preprocess_options = [(preprocessor.random_horizontal_flip, {})]
-      image_height = 3
-      image_width = 3
-      images = tf.random_uniform([1, image_height, image_width, 3])
-      boxes = self.createTestBoxes()
-      masks = self.createTestMasks()
-      keypoints, keypoint_visibilities = self.createTestKeypoints()
-      dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
-      keypoint_flip_permutation = self.createKeypointFlipPermutation()
-      tensor_dict = {
-          fields.InputDataFields.image:
-              images,
-          fields.InputDataFields.groundtruth_boxes:
-              boxes,
-          fields.InputDataFields.groundtruth_instance_masks:
-              masks,
-          fields.InputDataFields.groundtruth_keypoints:
-              keypoints,
-          fields.InputDataFields.groundtruth_keypoint_visibilities:
-              keypoint_visibilities
-      }
-      if include_dense_pose:
-        tensor_dict.update({
-            fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
-            fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
-            fields.InputDataFields.groundtruth_dp_surface_coords:
-                dp_surface_coords
-        })
-      preprocess_options = [(preprocessor.random_horizontal_flip, {
-          'keypoint_flip_permutation': keypoint_flip_permutation
-      })]
-      preprocessor_arg_map = preprocessor.get_default_func_arg_map(
-          include_instance_masks=True,
-          include_keypoints=True,
-          include_keypoint_visibilities=True,
-          include_dense_pose=include_dense_pose)
-      tensor_dict = preprocessor.preprocess(
-          tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
-      boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
-      masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
-      keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
-      keypoint_visibilities = tensor_dict[
-          fields.InputDataFields.groundtruth_keypoint_visibilities]
-      output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
-      if include_dense_pose:
-        dp_num_points = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_num_points]
-        dp_part_ids = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_part_ids]
-        dp_surface_coords = tensor_dict[
-            fields.InputDataFields.groundtruth_dp_surface_coords]
-        output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
-      return output_tensors
-    output_tensors = self.execute_cpu(graph_fn, [])
-    self.assertIsNotNone(output_tensors[0])  # Boxes.
-    self.assertIsNotNone(output_tensors[1])  # Masks.
-    self.assertIsNotNone(output_tensors[2])  # Keypoints
-    self.assertIsNotNone(output_tensors[3])  # Keypoint Visibilities.
-    if include_dense_pose:
-      self.assertIsNotNone(output_tensors[4])  # DensePose Num Points.
-      self.assertIsNotNone(output_tensors[5])  # DensePose Part IDs.
-      self.assertIsNotNone(output_tensors[6])  # DensePose Surface Coords
  def testRandomVerticalFlip(self):
@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
  @parameterized.parameters(
      {'include_dense_pose': False},
-      {'include_dense_pose': True}
  )
  def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
    def graph_fn():
@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
    size = max(image.shape)
    self.assertAlmostEqual(scale * 256.0, size)
+    self.assertAllClose(image[:, :, 0], masks[0, :, :])
+  @parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
+                                  ('scale_2_0', 2.0))
+  def test_random_scale_crop_and_pad_to_square(self, scale):
+    def graph_fn():
+      image = np.random.randn(512, 256, 1)
+      box_centers = [0.25, 0.5, 0.75]
+      box_size = 0.1
+      box_corners = []
+      box_labels = []
+      box_label_weights = []
+      keypoints = []
+      masks = []
+      for center_y in box_centers:
+        for center_x in box_centers:
+          box_corners.append(
+              [center_y - box_size / 2.0, center_x - box_size / 2.0,
+               center_y + box_size / 2.0, center_x + box_size / 2.0])
+          box_labels.append([1])
+          box_label_weights.append([1.])
+          keypoints.append(
+              [[center_y - box_size / 2.0, center_x - box_size / 2.0],
+               [center_y + box_size / 2.0, center_x + box_size / 2.0]])
+          masks.append(image[:, :, 0].reshape(512, 256))
+      image = tf.constant(image)
+      boxes = tf.constant(box_corners)
+      labels = tf.constant(box_labels)
+      label_weights = tf.constant(box_label_weights)
+      keypoints = tf.constant(keypoints)
+      masks = tf.constant(np.stack(masks))
+      (new_image, new_boxes, _, _, new_masks,
+       new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
+           image,
+           boxes,
+           labels,
+           label_weights,
+           masks=masks,
+           keypoints=keypoints,
+           scale_min=scale,
+           scale_max=scale,
+           output_size=512)
+      return new_image, new_boxes, new_masks, new_keypoints
+    image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
+    # Since random_scale_crop_and_pad_to_square may prune and clip boxes,
+    # we only need to find one of the boxes that was not clipped and check
+    # that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
+    # is equivalent to round(a-b, 7) == 0.
+    any_box_has_correct_size = False
+    effective_scale_y = int(scale * 512) / 512.0
+    effective_scale_x = int(scale * 256) / 512.0
+    expected_size_y = 0.1 * effective_scale_y
+    expected_size_x = 0.1 * effective_scale_x
+    for box in boxes:
+      ymin, xmin, ymax, xmax = box
+      any_box_has_correct_size |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_box_has_correct_size)
+    # Similar to the approach above where we check for at least one box with the
+    # expected dimensions, we check for at least one pair of keypoints whose
+    # distance matches the expected dimensions.
+    any_keypoint_pair_has_correct_dist = False
+    for keypoint_pair in keypoints:
+      ymin, xmin = keypoint_pair[0]
+      ymax, xmax = keypoint_pair[1]
+      any_keypoint_pair_has_correct_dist |= (
+          (round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
+          (round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
+          (round((ymax - ymin) - expected_size_y, 7) == 0.0) and
+          (round((xmax - xmin) - expected_size_x, 7) == 0.0))
+    self.assertTrue(any_keypoint_pair_has_correct_dist)
+    self.assertAlmostEqual(512.0, image.shape[0])
+    self.assertAlmostEqual(512.0, image.shape[1])
    self.assertAllClose(image[:, :, 0],
                        masks[0, :, :])

--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -141,6 +141,8 @@ class DetectionResultFields(object):
      for detection boxes in the image including background class.
    detection_classes: detection-level class labels.
    detection_masks: contains a segmentation mask for each detection box.
+    detection_surface_coords: contains DensePose surface coordinates for each
+      box.
    detection_boundaries: contains an object boundary for each detection box.
    detection_keypoints: contains detection keypoints for each detection box.
    detection_keypoint_scores: contains detection keypoint scores.
@@ -161,6 +163,7 @@ class DetectionResultFields(object):
  detection_features = 'detection_features'
  detection_classes = 'detection_classes'
  detection_masks = 'detection_masks'
+  detection_surface_coords = 'detection_surface_coords'
  detection_boundaries = 'detection_boundaries'
  detection_keypoints = 'detection_keypoints'
  detection_keypoint_scores = 'detection_keypoint_scores'
@@ -182,7 +185,11 @@ class BoxListFields(object):
    masks: masks per bounding box.
    boundaries: boundaries per bounding box.
    keypoints: keypoints per bounding box.
+    keypoint_visibilities: keypoint visibilities per bounding box.
    keypoint_heatmaps: keypoint heatmaps per bounding box.
+    densepose_num_points: number of DensePose points per bounding box.
+    densepose_part_ids: DensePose part ids per bounding box.
+    densepose_surface_coords: DensePose surface coordinates per bounding box.
    is_crowd: is_crowd annotation per bounding box.
  """
  boxes = 'boxes'
@@ -196,6 +203,9 @@ class BoxListFields(object):
  keypoints = 'keypoints'
  keypoint_visibilities = 'keypoint_visibilities'
  keypoint_heatmaps = 'keypoint_heatmaps'
+  densepose_num_points = 'densepose_num_points'
+  densepose_part_ids = 'densepose_part_ids'
+  densepose_surface_coords = 'densepose_surface_coords'
  is_crowd = 'is_crowd'
  group_of = 'group_of'

--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
 from object_detection.core import box_coder
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
+from object_detection.core import densepose_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import matcher as mat
 from object_detection.core import region_similarity_calculator as sim_calc
@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
  function.
  Args:
-    batch_predictions: A tensor of shape [batch_size, height, width, 2] for
+    batch_predictions: A tensor of shape [batch_size, height, width, channels]
-      single class offsets and [batch_size, height, width, class, 2] for
+      or [batch_size, height, width, class, channels] for class-specific
-      multiple classes offsets (e.g. keypoint joint offsets) representing the
+      features (e.g. keypoint joint offsets).
-      (height, width) or (y_offset, x_offset) predictions over a batch.
+    indices: A tensor of shape [num_instances, 3] for single class features or
-    indices: A tensor of shape [num_instances, 3] for single class offset and
+      [num_instances, 4] for multiple classes features.
-      [num_instances, 4] for multiple classes offsets representing the indices
-      in the batch to be penalized in a loss function
  Returns:
-    values: A tensor of shape [num_instances, 2] holding the predicted values
+    values: A tensor of shape [num_instances, channels] holding the predicted
-      at the given indices.
+      values at the given indices.
  """
  return tf.gather_nd(batch_predictions, indices)
@@ -1657,3 +1656,118 @@ class CenterNetMaskTargetAssigner(object):
    segmentation_target = tf.stack(segmentation_targets_list, axis=0)
    return segmentation_target
+class CenterNetDensePoseTargetAssigner(object):
+  """Wrapper to compute targets for DensePose task."""
+  def __init__(self, stride, num_parts=24):
+    self._stride = stride
+    self._num_parts = num_parts
+  def assign_part_and_coordinate_targets(self,
+                                         height,
+                                         width,
+                                         gt_dp_num_points_list,
+                                         gt_dp_part_ids_list,
+                                         gt_dp_surface_coords_list,
+                                         gt_weights_list=None):
+    """Returns the DensePose part_id and coordinate targets and their indices.
+    The returned values are expected to be used with predicted tensors
+    of size (batch_size, height//self._stride, width//self._stride, 2). The
+    predicted values at the relevant indices can be retrieved with the
+    get_batch_predictions_from_indices function.
+    Args:
+      height: int, height of input to the model. This is used to determine the
+        height of the output.
+      width: int, width of the input to the model. This is used to determine the
+        width of the output.
+      gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
+        containing the number of DensePose sampled points per box.
+      gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding, as
+        boxes may contain a different number of sampled points.
+      gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
+        [num_boxes, max_sampled_points, 4] containing the DensePose surface
+        coordinates (normalized) for each sampled point. Note that there may be
+        padding.
+      gt_weights_list: A list of 1-D tensors with shape [num_boxes]
+        corresponding to the weight of each groundtruth detection box.
+    Returns:
+      batch_indices: an integer tensor of shape [num_total_points, 4] holding
+        the indices inside the predicted tensor which should be penalized. The
+        first column indicates the index along the batch dimension and the
+        second and third columns indicate the index along the y and x
+        dimensions respectively. The fourth column is the part index.
+      batch_part_ids: an int tensor of shape [num_total_points, num_parts]
+        holding 1-hot encodings of parts for each sampled point.
+      batch_surface_coords: a float tensor of shape [num_total_points, 2]
+        holding the expected (v, u) coordinates for each sampled point.
+      batch_weights: a float tensor of shape [num_total_points] indicating the
+        weight of each prediction.
+      Note that num_total_points = batch_size * num_boxes * max_sampled_points.
+    """
+    if gt_weights_list is None:
+      gt_weights_list = [None] * len(gt_dp_num_points_list)
+    batch_indices = []
+    batch_part_ids = []
+    batch_surface_coords = []
+    batch_weights = []
+    for i, (num_points, part_ids, surface_coords, weights) in enumerate(
+        zip(gt_dp_num_points_list, gt_dp_part_ids_list,
+            gt_dp_surface_coords_list, gt_weights_list)):
+      num_boxes, max_sampled_points = (
+          shape_utils.combined_static_and_dynamic_shape(part_ids))
+      part_ids_flattened = tf.reshape(part_ids, [-1])
+      part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
+      # Get DensePose coordinates in the output space.
+      surface_coords_abs = densepose_ops.to_absolute_coordinates(
+          surface_coords, height // self._stride, width // self._stride)
+      surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
+      # Each tensor has shape [num_boxes * max_sampled_points].
+      yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
+      # Get the indices (in output space) for the DensePose coordinates. Note
+      # that if self._stride is larger than 1, this will have the effect of
+      # reducing spatial resolution of the groundtruth points.
+      indices_y = tf.cast(yabs, tf.int32)
+      indices_x = tf.cast(xabs, tf.int32)
+      # Assign ones if weights are not provided.
+      if weights is None:
+        weights = tf.ones(num_boxes, dtype=tf.float32)
+      # Create per-point weights.
+      weights_per_point = tf.reshape(
+          tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
+          shape=[-1])
+      # Mask out invalid (i.e. padded) DensePose points.
+      num_points_tiled = tf.tile(num_points[:, tf.newaxis],
+                                 multiples=[1, max_sampled_points])
+      range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
+                            multiples=[num_boxes, 1])
+      valid_points = tf.math.less(range_tiled, num_points_tiled)
+      valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
+      weights_per_point = weights_per_point * valid_points
+      # Shape of [num_boxes * max_sampled_points] integer tensor filled with
+      # current batch index.
+      batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
+      batch_indices.append(
+          tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
+                   axis=1))
+      batch_part_ids.append(part_ids_one_hot)
+      batch_surface_coords.append(tf.stack([v, u], axis=1))
+      batch_weights.append(weights_per_point)
+    batch_indices = tf.concat(batch_indices, axis=0)
+    batch_part_ids = tf.concat(batch_part_ids, axis=0)
+    batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
+    batch_weights = tf.concat(batch_weights, axis=0)
+    return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -1906,6 +1906,99 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
        expected_seg_target, segmentation_target)
+class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
+  def test_assign_part_and_coordinate_targets(self):
+    def graph_fn():
+      gt_dp_num_points_list = [
+          # Example 0.
+          tf.constant([2, 0, 3], dtype=tf.int32),
+          # Example 1.
+          tf.constant([1, 1], dtype=tf.int32),
+      ]
+      gt_dp_part_ids_list = [
+          # Example 0.
+          tf.constant([[1, 6, 0],
+                       [0, 0, 0],
+                       [0, 2, 3]], dtype=tf.int32),
+          # Example 1.
+          tf.constant([[7, 0, 0],
+                       [0, 0, 0]], dtype=tf.int32),
+      ]
+      gt_dp_surface_coords_list = [
+          # Example 0.
+          tf.constant(
+              [[[0.11, 0.2, 0.3, 0.4],  # Box 0.
+                [0.6, 0.4, 0.1, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.0, 0.0, 0.0, 0.0],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.22, 0.1, 0.6, 0.8],  # Box 2.
+                [0.0, 0.4, 0.5, 1.0],
+                [0.3, 0.2, 0.4, 0.1]]],
+              dtype=tf.float32),
+          # Example 1.
+          tf.constant(
+              [[[0.5, 0.5, 0.3, 1.0],  # Box 0.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.2, 0.2, 0.5, 0.8],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]]],
+              dtype=tf.float32),
+      ]
+      gt_weights_list = [
+          # Example 0.
+          tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
+          # Example 1.
+          tf.constant([0.0, 1.0], dtype=tf.float32),
+      ]
+      cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
+      batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+          cn_assigner.assign_part_and_coordinate_targets(
+              height=120,
+              width=80,
+              gt_dp_num_points_list=gt_dp_num_points_list,
+              gt_dp_part_ids_list=gt_dp_part_ids_list,
+              gt_dp_surface_coords_list=gt_dp_surface_coords_list,
+              gt_weights_list=gt_weights_list))
+      return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
+    batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+        self.execute(graph_fn, []))
+    expected_batch_indices = np.array([
+        # Example 0. e.g.
+        # The first set of indices is calculated as follows:
+        # floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
+        [0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
+        [0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
+        # Example 1.
+        [1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
+        [1, 0, 0, 0]
+    ], dtype=np.int32)
+    expected_batch_part_ids = tf.one_hot(
+        [1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
+    expected_batch_surface_coords = np.array([
+        # Box 0.
+        [0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
+        [0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
+        # Box 1.
+        [0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
+    ], np.float32)
+    expected_batch_weights = np.array([
+        # Box 0.
+        1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
+        # Box 1.
+        0.0, 0.0, 0.0, 1.0, 0.0, 0.0
+    ], dtype=np.float32)
+    self.assertAllEqual(expected_batch_indices, batch_indices)
+    self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
+    self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
+    self.assertAllClose(expected_batch_weights, batch_weights)
 if __name__ == '__main__':
  tf.enable_v2_behavior()
  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -50,14 +50,16 @@ import io
 import itertools
 import json
 import os
-from absl import app
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import six
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class ReKeyDataFn(beam.DoFn):
  """Re-keys tfrecords by sequence_key.
@@ -932,4 +934,4 @@ def main(argv=None, save_main_session=True):
 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
@@ -22,7 +22,7 @@ import datetime
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -31,6 +31,12 @@ from object_detection.dataset_tools.context_rcnn import add_context_to_examples
 from object_detection.utils import tf_version
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 @contextlib.contextmanager
 def InMemoryTFRecord(entries):
  temp = tempfile.NamedTemporaryFile(delete=False)

--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -39,13 +39,16 @@ import io
 import json
 import logging
 import os
-from absl import app
-import apache_beam as beam
 import numpy as np
 import PIL.Image
 import tensorflow.compat.v1 as tf
 from object_detection.utils import dataset_util
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class ParseImage(beam.DoFn):
  """A DoFn that parses a COCO-CameraTraps json and emits TFRecords."""
@@ -338,4 +341,4 @@ def main(argv=None, save_main_session=True):
 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
@@ -22,7 +22,6 @@ import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 from PIL import Image
@@ -30,6 +29,11 @@ import tensorflow.compat.v1 as tf
 from object_detection.dataset_tools.context_rcnn import create_cococameratraps_tfexample_main
 from object_detection.utils import tf_version
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class CreateCOCOCameraTrapsTfexampleTest(tf.test.TestCase):

--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -48,9 +48,11 @@ from __future__ import print_function
 import argparse
 import os
 import threading
-from absl import app
-import apache_beam as beam
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class GenerateDetectionDataFn(beam.DoFn):
@@ -290,4 +292,4 @@ def main(argv=None, save_main_session=True):
 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
@@ -22,7 +22,6 @@ import contextlib
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -39,6 +38,11 @@ if six.PY2:
 else:
  mock = unittest.mock
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""

--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -34,7 +34,8 @@ python tensorflow_models/object_detection/export_inference_graph.py \
    --input_type tf_example \
    --pipeline_config_path path/to/faster_rcnn_model.config \
    --trained_checkpoint_prefix path/to/model.ckpt \
-    --output_directory path/to/exported_model_directory
+    --output_directory path/to/exported_model_directory \
+    --additional_output_tensor_names detection_features
 python generate_embedding_data.py \
    --alsologtostderr \
@@ -52,13 +53,15 @@ import datetime
 import os
 import threading
-from absl import app
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class GenerateEmbeddingDataFn(beam.DoFn):
  """Generates embedding data for camera trap images.
@@ -410,5 +413,7 @@ def main(argv=None, save_main_session=True):
  p.run()
 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
@@ -21,7 +21,6 @@ import contextlib
 import os
 import tempfile
 import unittest
-import apache_beam as beam
 import numpy as np
 import six
 import tensorflow.compat.v1 as tf
@@ -38,6 +37,11 @@ if six.PY2:
 else:
  mock = unittest.mock
+try:
+  import apache_beam as beam  # pylint:disable=g-import-not-at-top
+except ModuleNotFoundError:
+  pass
 class FakeModel(model.DetectionModel):
  """A Fake Detection model with expected output nodes from post-processing."""

--- a/research/object_detection/dataset_tools/oid_tfrecord_creation.py
+++ b/research/object_detection/dataset_tools/oid_tfrecord_creation.py
@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  feature_map = {
      standard_fields.TfExampleFields.object_bbox_ymin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMin.as_matrix()),
+              filtered_data_frame_boxes.YMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmin:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMin.as_matrix()),
+              filtered_data_frame_boxes.XMin.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_ymax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.YMax.as_matrix()),
+              filtered_data_frame_boxes.YMax.to_numpy()),
      standard_fields.TfExampleFields.object_bbox_xmax:
          dataset_util.float_list_feature(
-              filtered_data_frame_boxes.XMax.as_matrix()),
+              filtered_data_frame_boxes.XMax.to_numpy()),
      standard_fields.TfExampleFields.object_class_text:
          dataset_util.bytes_list_feature([
              six.ensure_binary(label_text)
-              for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
+              for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
          ]),
      standard_fields.TfExampleFields.object_class_label:
          dataset_util.int64_list_feature(
              filtered_data_frame_boxes.LabelName.map(
-                  lambda x: label_map[x]).as_matrix()),
+                  lambda x: label_map[x]).to_numpy()),
      standard_fields.TfExampleFields.filename:
          dataset_util.bytes_feature(
              six.ensure_binary('{}.jpg'.format(image_id))),
@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
  if 'IsGroupOf' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_group_of] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
+                    filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
  if 'IsOccluded' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_occluded] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
+                    filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
                        int))
  if 'IsTruncated' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_truncated] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
+                    filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
                        int))
  if 'IsDepiction' in filtered_data_frame.columns:
    feature_map[standard_fields.TfExampleFields.
                object_depiction] = dataset_util.int64_list_feature(
-                    filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
+                    filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
                        int))
  if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
    feature_map[standard_fields.TfExampleFields.
                image_class_label] = dataset_util.int64_list_feature(
                    filtered_data_frame_labels.LabelName.map(
-                        lambda x: label_map[x]).as_matrix())
+                        lambda x: label_map[x]).to_numpy())
    feature_map[standard_fields.TfExampleFields
                .image_class_text] = dataset_util.bytes_list_feature([
                    six.ensure_binary(label_text) for label_text in
-                    filtered_data_frame_labels.LabelName.as_matrix()
+                    filtered_data_frame_labels.LabelName.to_numpy()
                ]),
  return tf.train.Example(features=tf.train.Features(feature=feature_map))
--- a/research/object_detection/dockerfiles/tf1/Dockerfile
+++ b/research/object_detection/dockerfiles/tf1/Dockerfile
@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
 USER tensorflow
 WORKDIR /home/tensorflow
-# Install pip dependencies
-RUN pip3 install --user absl-py
-RUN pip3 install --user contextlib2
-RUN pip3 install --user Cython
-RUN pip3 install --user jupyter
-RUN pip3 install --user matplotlib
-RUN pip3 install --user pycocotools
-RUN pip3 install --user tf-slim
 # Copy this version of of the model garden into the image
 COPY --chown=tensorflow . /home/tensorflow/models
 # Compile protobuf configs
 RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+WORKDIR /home/tensorflow/models/research/
+RUN cp object_detection/packages/tf1/setup.py ./
+ENV PATH="/home/tensorflow/.local/bin:${PATH}"
+RUN python -m pip install --user -U pip
+RUN python -m pip install --user .
-ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
 ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf1/README.md
+++ b/research/object_detection/dockerfiles/tf1/README.md
-# Tensorflow Object Detection on Docker
+# TensorFlow Object Detection on Docker
 These instructions are experimental.
@@ -6,6 +6,6 @@ These instructions are experimental.
 ```bash
 # From the root of the git repository
-docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
+docker build -f research/object_detection/dockerfiles/tf1/Dockerfile -t od .
 docker run -it od
 ```
--- a/research/object_detection/dockerfiles/tf2/Dockerfile
+++ b/research/object_detection/dockerfiles/tf2/Dockerfile
@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
 USER tensorflow
 WORKDIR /home/tensorflow
-# Install pip dependencies
-RUN pip3 install --user absl-py
-RUN pip3 install --user contextlib2
-RUN pip3 install --user Cython
-RUN pip3 install --user jupyter
-RUN pip3 install --user matplotlib
-RUN pip3 install --user pycocotools
-RUN pip3 install --user tf-slim
 # Copy this version of of the model garden into the image
 COPY --chown=tensorflow . /home/tensorflow/models
 # Compile protobuf configs
 RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
+WORKDIR /home/tensorflow/models/research/
+RUN cp object_detection/packages/tf2/setup.py ./
+ENV PATH="/home/tensorflow/.local/bin:${PATH}"
+RUN python -m pip install -U pip
+RUN python -m pip install .
-ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
 ENV TF_CPP_MIN_LOG_LEVEL 3
--- a/research/object_detection/dockerfiles/tf2/README.md
+++ b/research/object_detection/dockerfiles/tf2/README.md
-# Tensorflow Object Detection on Docker
+# TensorFlow Object Detection on Docker
 These instructions are experimental.
@@ -6,6 +6,6 @@ These instructions are experimental.
 ```bash
 # From the root of the git repository
-docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
+docker build -f research/object_detection/dockerfiles/tf2/Dockerfile -t od .
 docker run -it od
 ```
--- a/research/object_detection/eval_util.py
+++ b/research/object_detection/eval_util.py
@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
  detection_boxes, detection_masks, image_shape = args
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, image_shape[0], image_shape[1])
-  return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
+  # If the masks are currently float, binarize them. Otherwise keep them as
+  # integers, since they have already been thresholded.
+  if detection_masks_reframed.dtype == tf.float32:
+    detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
+  return tf.cast(detection_masks_reframed, tf.uint8)
 def _resize_groundtruth_masks(args):
@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
  return tf.cast(tf.squeeze(mask, 3), tf.uint8)
+def _resize_surface_coordinate_masks(args):
+  detection_boxes, surface_coords, image_shape = args
+  surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
+  surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
+  surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
+      surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
+  return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
+                  axis=-1)
 def _scale_keypoint_to_absolute(args):
  keypoints, image_shape = args
  return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
        num_keypoints] bool tensor with keypoint visibilities (Optional).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes. (Optional)
+      'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
+        tensor. (Optional)
+      'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
+        max_sampled_points] int32 tensor. (Optional)
+      'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
+        max_sampled_points, 4] float32 tensor. (Optional)
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
    'detection_scores': [batch_size, max_detections] float32 tensor of scores.
    'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
      classes.
-    'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
+    'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
-      binarized masks, reframed to full image masks. (Optional)
+      instance masks, reframed to full image masks. Note that these may be
+      binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
    'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
      float32 tensor containing keypoint coordinates. (Optional)
    'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
      float32 tensor containing keypoint scores. (Optional)
+    'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
+      tensor with normalized surface coordinates (e.g. DensePose UV
+      coordinates). (Optional)
    'num_detections': [batch_size] int64 tensor containing number of valid
      detections.
    'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,
  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks]
-    # TODO(rathodv): This should be done in model's postprocess
-    # function ideally.
    output_dict[detection_fields.detection_masks] = (
        shape_utils.static_or_dynamic_map_fn(
            _resize_detection_masks,
            elems=[detection_boxes, detection_masks,
                   original_image_spatial_shapes],
            dtype=tf.uint8))
+    if detection_fields.detection_surface_coords in detections:
+      detection_surface_coords = detections[
+          detection_fields.detection_surface_coords]
+      output_dict[detection_fields.detection_surface_coords] = (
+          shape_utils.static_or_dynamic_map_fn(
+              _resize_surface_coordinate_masks,
+              elems=[detection_boxes, detection_surface_coords,
+                     original_image_spatial_shapes],
+              dtype=tf.float32))
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints]
@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
          'recall_upper_bound': (eval_config.recall_upper_bound)
      }
  return evaluator_options
+def has_densepose(eval_dict):
+  return (fields.DetectionResultFields.detection_masks in eval_dict and
+          fields.DetectionResultFields.detection_surface_coords in eval_dict)