Merged commit includes the following changes: (#8779)

319539052 by rathodv: Internal Change. -- 319537186 by rathodv: Internal Change -- 319320800 by jonathanhuang: Internal changes. -- 319260368 by ronnyvotel: Adding a target assigner for DensePose. -- 319240476 by sbeery: switching to main() for argparse -- PiperOrigin-RevId: 319539052 Co-authored-by: TF Object Detection Team <no-reply@google.com>

Merged commit includes the following changes: (#8779)
319539052 by rathodv: Internal Change. -- 319537186 by rathodv: Internal Change -- 319320800 by jonathanhuang: Internal changes. -- 319260368 by ronnyvotel: Adding a target assigner for DensePose. -- 319240476 by sbeery: switching to main() for argparse -- PiperOrigin-RevId: 319539052 Co-authored-by: TF Object Detection Team <no-reply@google.com>
36e786dc · vivek rathod · GitHub · 523f5e05 · 36e786dc · 36e786dc
Unverified Commit 36e786dc authored Jul 03, 2020 by vivek rathod Committed by GitHub Jul 03, 2020
10 changed files
--- a/research/object_detection/builders/model_builder.py
+++ b/research/object_detection/builders/model_builder.py
@@ -110,11 +110,11 @@ if tf_version.is_tf2():
          frcnn_resnet_keras.FasterRCNNResnet152KerasFeatureExtractor,
      'faster_rcnn_inception_resnet_v2_keras':
      frcnn_inc_res_keras.FasterRCNNInceptionResnetV2KerasFeatureExtractor,
-      'fasret_rcnn_resnet50_fpn_keras':
+      'faster_rcnn_resnet50_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet50FpnKerasFeatureExtractor,
-      'fasret_rcnn_resnet101_fpn_keras':
+      'faster_rcnn_resnet101_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet101FpnKerasFeatureExtractor,
-      'fasret_rcnn_resnet152_fpn_keras':
+      'faster_rcnn_resnet152_fpn_keras':
          frcnn_resnet_fpn_keras.FasterRCNNResnet152FpnKerasFeatureExtractor,
  }


--- a/research/object_detection/core/target_assigner.py
+++ b/research/object_detection/core/target_assigner.py
@@ -45,6 +45,7 @@ from object_detection.box_coders import mean_stddev_box_coder
 from object_detection.core import box_coder
 from object_detection.core import box_list
 from object_detection.core import box_list_ops
+from object_detection.core import densepose_ops
 from object_detection.core import keypoint_ops
 from object_detection.core import matcher as mat
 from object_detection.core import region_similarity_calculator as sim_calc
@@ -799,17 +800,15 @@ def get_batch_predictions_from_indices(batch_predictions, indices):
  function.

  Args:
-    batch_predictions: A tensor of shape [batch_size, height, width, 2] for
-      single class offsets and [batch_size, height, width, class, 2] for
-      multiple classes offsets (e.g. keypoint joint offsets) representing the
-      (height, width) or (y_offset, x_offset) predictions over a batch.
-    indices: A tensor of shape [num_instances, 3] for single class offset and
-      [num_instances, 4] for multiple classes offsets representing the indices
-      in the batch to be penalized in a loss function
+    batch_predictions: A tensor of shape [batch_size, height, width, channels]
+      or [batch_size, height, width, class, channels] for class-specific
+      features (e.g. keypoint joint offsets).
+    indices: A tensor of shape [num_instances, 3] for single class features or
+      [num_instances, 4] for multiple classes features.

  Returns:
-    values: A tensor of shape [num_instances, 2] holding the predicted values
-      at the given indices.
+    values: A tensor of shape [num_instances, channels] holding the predicted
+      values at the given indices.
  """
  return tf.gather_nd(batch_predictions, indices)

@@ -1657,3 +1656,118 @@ class CenterNetMaskTargetAssigner(object):

    segmentation_target = tf.stack(segmentation_targets_list, axis=0)
    return segmentation_target
+
+
+class CenterNetDensePoseTargetAssigner(object):
+  """Wrapper to compute targets for DensePose task."""
+
+  def __init__(self, stride, num_parts=24):
+    self._stride = stride
+    self._num_parts = num_parts
+
+  def assign_part_and_coordinate_targets(self,
+                                         height,
+                                         width,
+                                         gt_dp_num_points_list,
+                                         gt_dp_part_ids_list,
+                                         gt_dp_surface_coords_list,
+                                         gt_weights_list=None):
+    """Returns the DensePose part_id and coordinate targets and their indices.
+
+    The returned values are expected to be used with predicted tensors
+    of size (batch_size, height//self._stride, width//self._stride, 2). The
+    predicted values at the relevant indices can be retrieved with the
+    get_batch_predictions_from_indices function.
+
+    Args:
+      height: int, height of input to the model. This is used to determine the
+        height of the output.
+      width: int, width of the input to the model. This is used to determine the
+        width of the output.
+      gt_dp_num_points_list: a list of 1-D tf.int32 tensors of shape [num_boxes]
+        containing the number of DensePose sampled points per box.
+      gt_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
+        [num_boxes, max_sampled_points] containing the DensePose part ids
+        (0-indexed) for each sampled point. Note that there may be padding, as
+        boxes may contain a different number of sampled points.
+      gt_dp_surface_coords_list: a list of 3-D tf.float32 tensors of shape
+        [num_boxes, max_sampled_points, 4] containing the DensePose surface
+        coordinates (normalized) for each sampled point. Note that there may be
+        padding.
+      gt_weights_list: A list of 1-D tensors with shape [num_boxes]
+        corresponding to the weight of each groundtruth detection box.
+
+    Returns:
+      batch_indices: an integer tensor of shape [num_total_points, 4] holding
+        the indices inside the predicted tensor which should be penalized. The
+        first column indicates the index along the batch dimension and the
+        second and third columns indicate the index along the y and x
+        dimensions respectively. The fourth column is the part index.
+      batch_part_ids: an int tensor of shape [num_total_points, num_parts]
+        holding 1-hot encodings of parts for each sampled point.
+      batch_surface_coords: a float tensor of shape [num_total_points, 2]
+        holding the expected (v, u) coordinates for each sampled point.
+      batch_weights: a float tensor of shape [num_total_points] indicating the
+        weight of each prediction.
+      Note that num_total_points = batch_size * num_boxes * max_sampled_points.
+    """
+
+    if gt_weights_list is None:
+      gt_weights_list = [None] * len(gt_dp_num_points_list)
+
+    batch_indices = []
+    batch_part_ids = []
+    batch_surface_coords = []
+    batch_weights = []
+
+    for i, (num_points, part_ids, surface_coords, weights) in enumerate(
+        zip(gt_dp_num_points_list, gt_dp_part_ids_list,
+            gt_dp_surface_coords_list, gt_weights_list)):
+      num_boxes, max_sampled_points = (
+          shape_utils.combined_static_and_dynamic_shape(part_ids))
+      part_ids_flattened = tf.reshape(part_ids, [-1])
+      part_ids_one_hot = tf.one_hot(part_ids_flattened, depth=self._num_parts)
+      # Get DensePose coordinates in the output space.
+      surface_coords_abs = densepose_ops.to_absolute_coordinates(
+          surface_coords, height // self._stride, width // self._stride)
+      surface_coords_abs = tf.reshape(surface_coords_abs, [-1, 4])
+      # Each tensor has shape [num_boxes * max_sampled_points].
+      yabs, xabs, v, u = tf.unstack(surface_coords_abs, axis=-1)
+
+      # Get the indices (in output space) for the DensePose coordinates. Note
+      # that if self._stride is larger than 1, this will have the effect of
+      # reducing spatial resolution of the groundtruth points.
+      indices_y = tf.cast(yabs, tf.int32)
+      indices_x = tf.cast(xabs, tf.int32)
+
+      # Assign ones if weights are not provided.
+      if weights is None:
+        weights = tf.ones(num_boxes, dtype=tf.float32)
+      # Create per-point weights.
+      weights_per_point = tf.reshape(
+          tf.tile(weights[:, tf.newaxis], multiples=[1, max_sampled_points]),
+          shape=[-1])
+      # Mask out invalid (i.e. padded) DensePose points.
+      num_points_tiled = tf.tile(num_points[:, tf.newaxis],
+                                 multiples=[1, max_sampled_points])
+      range_tiled = tf.tile(tf.range(max_sampled_points)[tf.newaxis, :],
+                            multiples=[num_boxes, 1])
+      valid_points = tf.math.less(range_tiled, num_points_tiled)
+      valid_points = tf.cast(tf.reshape(valid_points, [-1]), dtype=tf.float32)
+      weights_per_point = weights_per_point * valid_points
+
+      # Shape of [num_boxes * max_sampled_points] integer tensor filled with
+      # current batch index.
+      batch_index = i * tf.ones_like(indices_y, dtype=tf.int32)
+      batch_indices.append(
+          tf.stack([batch_index, indices_y, indices_x, part_ids_flattened],
+                   axis=1))
+      batch_part_ids.append(part_ids_one_hot)
+      batch_surface_coords.append(tf.stack([v, u], axis=1))
+      batch_weights.append(weights_per_point)
+
+    batch_indices = tf.concat(batch_indices, axis=0)
+    batch_part_ids = tf.concat(batch_part_ids, axis=0)
+    batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
+    batch_weights = tf.concat(batch_weights, axis=0)
+    return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
--- a/research/object_detection/core/target_assigner_test.py
+++ b/research/object_detection/core/target_assigner_test.py
@@ -1906,6 +1906,99 @@ class CenterNetMaskTargetAssignerTest(test_case.TestCase):
        expected_seg_target, segmentation_target)


+class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
+
+  def test_assign_part_and_coordinate_targets(self):
+    def graph_fn():
+      gt_dp_num_points_list = [
+          # Example 0.
+          tf.constant([2, 0, 3], dtype=tf.int32),
+          # Example 1.
+          tf.constant([1, 1], dtype=tf.int32),
+      ]
+      gt_dp_part_ids_list = [
+          # Example 0.
+          tf.constant([[1, 6, 0],
+                       [0, 0, 0],
+                       [0, 2, 3]], dtype=tf.int32),
+          # Example 1.
+          tf.constant([[7, 0, 0],
+                       [0, 0, 0]], dtype=tf.int32),
+      ]
+      gt_dp_surface_coords_list = [
+          # Example 0.
+          tf.constant(
+              [[[0.11, 0.2, 0.3, 0.4],  # Box 0.
+                [0.6, 0.4, 0.1, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.0, 0.0, 0.0, 0.0],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.22, 0.1, 0.6, 0.8],  # Box 2.
+                [0.0, 0.4, 0.5, 1.0],
+                [0.3, 0.2, 0.4, 0.1]]],
+              dtype=tf.float32),
+          # Example 1.
+          tf.constant(
+              [[[0.5, 0.5, 0.3, 1.0],  # Box 0.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]],
+               [[0.2, 0.2, 0.5, 0.8],  # Box 1.
+                [0.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0]]],
+              dtype=tf.float32),
+      ]
+      gt_weights_list = [
+          # Example 0.
+          tf.constant([1.0, 1.0, 0.5], dtype=tf.float32),
+          # Example 1.
+          tf.constant([0.0, 1.0], dtype=tf.float32),
+      ]
+      cn_assigner = targetassigner.CenterNetDensePoseTargetAssigner(stride=4)
+      batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+          cn_assigner.assign_part_and_coordinate_targets(
+              height=120,
+              width=80,
+              gt_dp_num_points_list=gt_dp_num_points_list,
+              gt_dp_part_ids_list=gt_dp_part_ids_list,
+              gt_dp_surface_coords_list=gt_dp_surface_coords_list,
+              gt_weights_list=gt_weights_list))
+
+      return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
+    batch_indices, batch_part_ids, batch_surface_coords, batch_weights = (
+        self.execute(graph_fn, []))
+
+    expected_batch_indices = np.array([
+        # Example 0. e.g.
+        # The first set of indices is calculated as follows:
+        # floor(0.11*120/4) = 3, floor(0.2*80/4) = 4.
+        [0, 3, 4, 1], [0, 18, 8, 6], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
+        [0, 0, 0, 0], [0, 6, 2, 0], [0, 0, 8, 2], [0, 9, 4, 3],
+        # Example 1.
+        [1, 15, 10, 7], [1, 0, 0, 0], [1, 0, 0, 0], [1, 6, 4, 0], [1, 0, 0, 0],
+        [1, 0, 0, 0]
+    ], dtype=np.int32)
+    expected_batch_part_ids = tf.one_hot(
+        [1, 6, 0, 0, 0, 0, 0, 2, 3, 7, 0, 0, 0, 0, 0], depth=24).numpy()
+    expected_batch_surface_coords = np.array([
+        # Box 0.
+        [0.3, 0.4], [0.1, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0],
+        [0.6, 0.8], [0.5, 1.0], [0.4, 0.1],
+        # Box 1.
+        [0.3, 1.0], [0.0, 0.0], [0.0, 0.0], [0.5, 0.8], [0.0, 0.0], [0.0, 0.0],
+    ], np.float32)
+    expected_batch_weights = np.array([
+        # Box 0.
+        1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5,
+        # Box 1.
+        0.0, 0.0, 0.0, 1.0, 0.0, 0.0
+    ], dtype=np.float32)
+    self.assertAllEqual(expected_batch_indices, batch_indices)
+    self.assertAllEqual(expected_batch_part_ids, batch_part_ids)
+    self.assertAllClose(expected_batch_surface_coords, batch_surface_coords)
+    self.assertAllClose(expected_batch_weights, batch_weights)
+
+
 if __name__ == '__main__':
  tf.enable_v2_behavior()
  tf.test.main()
--- a/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
+++ b/research/object_detection/dataset_tools/context_rcnn/add_context_to_examples.py
@@ -51,7 +51,6 @@ import itertools
 import json
 import os

-from absl import app
 import apache_beam as beam
 import numpy as np
 import PIL.Image
@@ -932,4 +931,4 @@ def main(argv=None, save_main_session=True):


 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
+++ b/research/object_detection/dataset_tools/context_rcnn/create_cococameratraps_tfexample_main.py
@@ -39,7 +39,6 @@ import io
 import json
 import logging
 import os
-from absl import app
 import apache_beam as beam
 import numpy as np
 import PIL.Image
@@ -338,4 +337,4 @@ def main(argv=None, save_main_session=True):


 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_detection_data.py
@@ -48,7 +48,6 @@ from __future__ import print_function
 import argparse
 import os
 import threading
-from absl import app
 import apache_beam as beam
 import tensorflow.compat.v1 as tf

@@ -290,4 +289,4 @@ def main(argv=None, save_main_session=True):


 if __name__ == '__main__':
-  app.run(main)
+  main()
--- a/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
+++ b/research/object_detection/dataset_tools/context_rcnn/generate_embedding_data.py
@@ -52,8 +52,6 @@ import datetime
 import os
 import threading

-from absl import app
-
 import apache_beam as beam
 import numpy as np
 import six
@@ -410,5 +408,7 @@ def main(argv=None, save_main_session=True):

  p.run()

+
 if __name__ == '__main__':
-  app.run(main)
+  main()
+
--- a/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
@@ -2547,7 +2547,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
      if second_stage_mask_loss is not None:
        mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
                                second_stage_mask_loss, name='mask_loss')
-        loss_dict[mask_loss.op.name] = mask_loss
+        loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
    return loss_dict

  def _get_mask_proposal_boxes_and_classes(

--- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor.py
@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
        the resnet_v1.resnet_v1_{50,101,152} models.
      resnet_v1_base_model_name: model name under which to construct resnet v1.
      first_stage_features_stride: See base class.
-      conv_hyperparameters: a `hyperparams_builder.KerasLayerHyperparams` object
+      conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
        containing convolution hyperparameters for the layers added on top of
        the base feature extractor.
      batch_norm_trainable: See base class.
@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
      with tf.name_scope('ResnetV1FPN'):
        full_resnet_v1_model = self._resnet_v1_base_model(
            batchnorm_training=self._train_batch_norm,
-            conv_hyperparams=(self._conv_hyperparams
-                              if self._override_base_feature_extractor_hyperparams
+            conv_hyperparams=(self._conv_hyperparams if
+                              self._override_base_feature_extractor_hyperparams
                              else None),
            classes=None,
            weights=None,
            include_top=False)
-        output_layers = _RESNET_MODEL_OUTPUT_LAYERS[self._resnet_v1_base_model_name]
+        output_layers = _RESNET_MODEL_OUTPUT_LAYERS[
+            self._resnet_v1_base_model_name]
        outputs = [full_resnet_v1_model.get_layer(output_layer_name).output
                   for output_layer_name in output_layers]
        self.classification_backbone = tf.keras.Model(
            inputs=full_resnet_v1_model.inputs,
            outputs=outputs)
-        backbone_outputs = self.classification_backbone(full_resnet_v1_model.inputs)
+        backbone_outputs = self.classification_backbone(
+            full_resnet_v1_model.inputs)

        # construct FPN feature generator
        self._base_fpn_max_level = min(self._fpn_max_level, 5)
@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
    """
    with tf.name_scope(name):
      with tf.name_scope('ResnetV1FPN'):
-        # TODO: Add a batchnorm layer between two fc layers.
+        # TODO(yiming): Add a batchnorm layer between two fc layers.
        feature_extractor_model = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(units=1024, activation='relu'),
@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams
+    )


 class FasterRCNNResnet101FpnKerasFeatureExtractor(
    FasterRCNNResnetV1FpnKerasFeatureExtractor):
  """Faster RCNN with Resnet101 FPN feature extractor."""
+
  def __init__(self,
               is_training,
               first_stage_features_stride=16,
@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams)


 class FasterRCNNResnet152FpnKerasFeatureExtractor(
@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor(
        fpn_min_level=fpn_min_level,
        fpn_max_level=fpn_max_level,
        additional_layer_depth=additional_layer_depth,
-        override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
+        override_base_feature_extractor_hyperparams=
+        override_base_feature_extractor_hyperparams)
--- a/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
+++ b/research/object_detection/models/faster_rcnn_resnet_v1_fpn_keras_feature_extractor_tf2_test.py
@@ -21,8 +21,8 @@ from google.protobuf import text_format

 from object_detection.builders import hyperparams_builder
 from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
-from object_detection.utils import tf_version
 from object_detection.protos import hyperparams_pb2
+from object_detection.utils import tf_version


 @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
        }
      }
    """
-    text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
+    text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
    return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)

  def _build_feature_extractor(self):