pull latest

657dcda5 · Kaushik Shivakumar · 26e24e21 · e6017471 · 657dcda5 · 657dcda5
Commit 657dcda5 authored Jul 01, 2020 by Kaushik Shivakumar
20 changed files
--- a/research/object_detection/core/standard_fields.py
+++ b/research/object_detection/core/standard_fields.py
@@ -66,6 +66,11 @@ class InputDataFields(object):
    groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
    groundtruth_label_weights: groundtruth label weights.
    groundtruth_weights: groundtruth weight factor for bounding boxes.
+    groundtruth_dp_num_points: The number of DensePose sampled points for each
+      instance.
+    groundtruth_dp_part_ids: Part indices for DensePose points.
+    groundtruth_dp_surface_coords: Image locations and UV coordinates for
+      DensePose points.
    num_groundtruth_boxes: number of groundtruth boxes.
    is_annotated: whether an image has been labeled or not.
    true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
  groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
  groundtruth_label_weights = 'groundtruth_label_weights'
  groundtruth_weights = 'groundtruth_weights'
+  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+  groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
  num_groundtruth_boxes = 'num_groundtruth_boxes'
  is_annotated = 'is_annotated'
  true_image_shape = 'true_image_shape'

--- a/research/object_detection/data_decoders/tf_example_decoder.py
+++ b/research/object_detection/data_decoders/tf_example_decoder.py
@@ -30,6 +30,7 @@ from object_detection.core import data_decoder
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils
 # pylint: disable=g-import-not-at-top
 try:
@@ -170,7 +171,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
               num_additional_channels=0,
               load_multiclass_scores=False,
               load_context_features=False,
-               expand_hierarchy_labels=False):
+               expand_hierarchy_labels=False,
+               load_dense_pose=False):
    """Constructor sets keys_to_features and items_to_handlers.
    Args:
@@ -201,6 +203,7 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        account the provided hierarchy in the label_map_proto_file. For positive
        classes, the labels are extended to ancestor. For negative classes,
        the labels are expanded to descendants.
+      load_dense_pose: Whether to load DensePose annotations.
    Raises:
      ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if load_dense_pose:
+      self.keys_to_features['image/object/densepose/num'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/part_index'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/x'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/y'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/u'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/v'] = (
+          tf.VarLenFeature(tf.float32))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_num_points] = (
+              slim_example_decoder.Tensor('image/object/densepose/num'))
+      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+          slim_example_decoder.ItemHandlerCallback(
+              ['image/object/densepose/part_index',
+               'image/object/densepose/num'], self._dense_pose_part_indices))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_surface_coords] = (
+              slim_example_decoder.ItemHandlerCallback(
+                  ['image/object/densepose/x', 'image/object/densepose/y',
+                   'image/object/densepose/u', 'image/object/densepose/v',
+                   'image/object/densepose/num'],
+                  self._dense_pose_surface_coordinates))
    if label_map_proto_file:
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ class TfExampleDecoder(data_decoder.DataDecoder):
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
+    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+          dtype=tf.int32)
+      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+          dtype=tf.int32)
    return tensor_dict
  def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ class TfExampleDecoder(data_decoder.DataDecoder):
        lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
        lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
+  def _dense_pose_part_indices(self, keys_to_tensors):
+    """Creates a tensor that contains part indices for each DensePose point.
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+    Returns:
+      A 2-D int32 tensor of shape [num_instances, num_points] where each element
+      contains the DensePose part index (0-23). The value `num_points`
+      corresponds to the maximum number of sampled points across all instances
+      in the image. Note that instances with less sampled points will be padded
+      with zeros in the last dimension.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    part_index = keys_to_tensors['image/object/densepose/part_index']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(part_index, tf.SparseTensor):
+      part_index = tf.sparse_tensor_to_dense(part_index)
+    part_index = tf.cast(part_index, dtype=tf.int32)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+    def pad_parts_tensor(instance_ind):
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      part_inds = part_index[points_range_start:points_range_end]
+      return shape_utils.pad_or_clip_nd(part_inds,
+                                        output_shape=[max_points_per_instance])
+    return tf.map_fn(pad_parts_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.int32)
+  def _dense_pose_surface_coordinates(self, keys_to_tensors):
+    """Creates a tensor that contains surface coords for each DensePose point.
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+    Returns:
+      A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+      point contains (y, x, v, u) data for each sampled DensePose point. The
+      (y, x) coordinate has normalized image locations for the point, and (v, u)
+      contains the surface coordinate (also normalized) for the part. The value
+      `num_points` corresponds to the maximum number of sampled points across
+      all instances in the image. Note that instances with less sampled points
+      will be padded with zeros in dim=1.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    dp_y = keys_to_tensors['image/object/densepose/y']
+    dp_x = keys_to_tensors['image/object/densepose/x']
+    dp_v = keys_to_tensors['image/object/densepose/v']
+    dp_u = keys_to_tensors['image/object/densepose/u']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(dp_y, tf.SparseTensor):
+      dp_y = tf.sparse_tensor_to_dense(dp_y)
+    if isinstance(dp_x, tf.SparseTensor):
+      dp_x = tf.sparse_tensor_to_dense(dp_x)
+    if isinstance(dp_v, tf.SparseTensor):
+      dp_v = tf.sparse_tensor_to_dense(dp_v)
+    if isinstance(dp_u, tf.SparseTensor):
+      dp_u = tf.sparse_tensor_to_dense(dp_u)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+    def pad_surface_coordinates_tensor(instance_ind):
+      """Pads DensePose surface coordinates for each instance."""
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      y = dp_y[points_range_start:points_range_end]
+      x = dp_x[points_range_start:points_range_end]
+      v = dp_v[points_range_start:points_range_end]
+      u = dp_u[points_range_start:points_range_end]
+      # Create [num_points_i, 4] tensor, where num_points_i is the number of
+      # sampled points for instance i.
+      unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+      return shape_utils.pad_or_clip_nd(
+          unpadded_tensor, output_shape=[max_points_per_instance, 4])
+    return tf.map_fn(pad_surface_coordinates_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.float32)
  def _expand_image_label_hierarchy(self, image_classes, image_confidences):
    """Expand image level labels according to the hierarchy.

--- a/research/object_detection/data_decoders/tf_example_decoder_test.py
+++ b/research/object_detection/data_decoders/tf_example_decoder_test.py
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples_tf1_test.py
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_tf1_test.py
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data_tf1_test.py
--- a/research/object_detection/dataset_tools/create_coco_tf_record.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record.py
--- a/research/object_detection/dataset_tools/create_coco_tf_record_test.py
+++ b/research/object_detection/dataset_tools/create_coco_tf_record_test.py
--- a/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat
+++ b/research/object_detection/dataset_tools/densepose/UV_symmetry_transforms.mat
--- a/research/object_detection/dataset_tools/seq_example_util_test.py
+++ b/research/object_detection/dataset_tools/seq_example_util_test.py
@@ -288,7 +288,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75, 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
-        ['cat', 'frog'],
+        [b'cat', b'frog'],
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
    self.assertAllClose(
        [0.],
@@ -332,7 +332,7 @@ class SeqExampleUtilTest(tf.test.TestCase):
        [0.75],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
-        ['cat'],
+        [b'cat'],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
    self.assertAllClose(
        [],

--- a/research/object_detection/dataset_tools/tf_record_creation_util_test.py
+++ b/research/object_detection/dataset_tools/tf_record_creation_util_test.py
@@ -42,7 +42,7 @@ class OpenOutputTfrecordsTests(tf.test.TestCase):
      tf_record_path = '{}-{:05d}-of-00010'.format(
          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
      records = list(tf.python_io.tf_record_iterator(tf_record_path))
-      self.assertAllEqual(records, ['test_{}'.format(idx)])
+      self.assertAllEqual(records, ['test_{}'.format(idx).encode('utf-8')])
 if __name__ == '__main__':

--- a/research/object_detection/dockerfiles/1.15/Dockerfile
+++ b/research/object_detection/dockerfiles/1.15/Dockerfile
--- a/research/object_detection/dockerfiles/1.15/README.md
+++ b/research/object_detection/dockerfiles/1.15/README.md
--- a/research/object_detection/dockerfiles/2.2/Dockerfile
+++ b/research/object_detection/dockerfiles/2.2/Dockerfile
--- a/research/object_detection/dockerfiles/2.2/README.md
+++ b/research/object_detection/dockerfiles/2.2/README.md