Merge branch 'purdue-yolo' into detection_generator_pr

5ffcc5b6 · Anirudh Vegesana · GitHub · 0b81a843 · 76e0c014 · 5ffcc5b6
Unverified Commit 5ffcc5b6 authored Jul 21, 2021 by Anirudh Vegesana Committed by GitHub Jul 21, 2021
10 changed files
--- a/research/object_detection/model_lib.py
+++ b/research/object_detection/model_lib.py
@@ -448,9 +448,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
    tf.keras.backend.set_learning_phase(is_training)
    # Set policy for mixed-precision training with Keras-based models.
    if use_tpu and train_config.use_bfloat16:
-      from tensorflow.python.keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
      # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
-      base_layer_utils.enable_v2_dtype_behavior()
+      tf.keras.layers.enable_v2_dtype_behavior()
      tf2.keras.mixed_precision.set_global_policy('mixed_bfloat16')
    detection_model = detection_model_fn(
        is_training=is_training, add_summaries=(not use_tpu))

--- a/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor.py
+++ b/research/object_detection/models/center_net_mobilenet_v2_fpn_feature_extractor.py
@@ -125,14 +125,8 @@ class CenterNetMobileNetV2FPNFeatureExtractor(
    self._base_model.load_weights(path)

  @property
-  def supported_sub_model_types(self):
-    return ['classification']
-
-  def get_sub_model(self, sub_model_type):
-    if sub_model_type == 'classification':
-      return self._base_model
-    else:
-      ValueError('Sub model type "{}" not supported.'.format(sub_model_type))
+  def classification_backbone(self):
+    return self._base_model

  def call(self, inputs):
    return [self._feature_extractor_model(inputs)]

--- a/research/object_detection/models/keras_models/nonlocal_block.py
+++ b/research/object_detection/models/keras_models/nonlocal_block.py
+"""Layer for Non-Local operation.
+
+This is a building block which mimics self-attention in a feature map.
+
+For more information, please see https://arxiv.org/pdf/1711.07971.pdf
+"""
+
+import tensorflow as tf
+
+from object_detection.utils import shape_utils
+
+
+class NonLocalBlock(tf.keras.layers.Layer):
+  """A Non-local block."""
+
+  def __init__(self, bottleneck_channels, pairwise_fn='dot', pool_size=None,
+               add_coord_conv=False):
+    """Constructor.
+
+    Args:
+      bottleneck_channels: The number of channels used to do pairwise
+        comparisons at each feature location.
+      pairwise_fn: The pairwise comparison function. Currently supports
+        'dot' and 'embedded_softmax'.
+      pool_size: The downsample size (achieved with max pool) used prior to
+        doing pairwise comparisons. This does not affect the shape of the output
+        tensor, but reduces computation. For a pool_size of 2, computation is
+        dropped by a factor of 4. If None, no downsampling is performed.
+      add_coord_conv: Concatenates a 2-channel feature map with normalized
+        coordinates (in range [-1, 1]) to the input, prior to the
+        non-local block.
+
+    Raises:
+      RuntimeError: If self._pairwise_fn is not one of "dot" or
+        "embedded_softmax".
+    """
+    super().__init__()
+    self._bottleneck_channels = bottleneck_channels
+    self._add_coord_conv = add_coord_conv
+
+    self._pool_size = pool_size
+    if pairwise_fn not in ('dot', 'embedded_softmax'):
+      raise RuntimeError('pairwise_fn must be one of "dot" or '
+                         '"embedded_softmax"')
+    self._pairwise_fn = pairwise_fn
+
+  def build(self, input_shape):
+    channels = input_shape[-1]
+    self.queries_conv = tf.keras.layers.Conv2D(
+        filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
+        strides=1, padding='same')
+    self.keys_conv = tf.keras.layers.Conv2D(
+        filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
+        strides=1, padding='same')
+    self.values_conv = tf.keras.layers.Conv2D(
+        filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
+        strides=1, padding='same')
+    self.expand_conv = tf.keras.layers.Conv2D(
+        filters=channels, kernel_size=1, use_bias=False, strides=1,
+        padding='same')
+    self.batchnorm = tf.keras.layers.BatchNormalization(
+        name='batchnorm', epsilon=1e-5, momentum=0.1, fused=True,
+        beta_initializer='zeros', gamma_initializer='zeros')
+    if self._pool_size:
+      self.maxpool_keys = tf.keras.layers.MaxPool2D(
+          pool_size=(self._pool_size, self._pool_size))
+      self.maxpool_values = tf.keras.layers.MaxPool2D(
+          pool_size=(self._pool_size, self._pool_size))
+
+  def call(self, inputs):
+    """Applies a non-local block to an input feature map.
+
+    Args:
+      inputs: A [batch, height, width, channels] float32 input tensor.
+
+    Returns:
+      An output tensor of the same shape as the input.
+    """
+    batch, height, width, _ = shape_utils.combined_static_and_dynamic_shape(
+        inputs)
+
+    x = inputs
+    if self._add_coord_conv:
+      coords_x, coords_y = tf.meshgrid(tf.linspace(-1., 1., height),
+                                       tf.linspace(-1., 1., width))
+      coords = tf.stack([coords_y, coords_x], axis=-1)
+      coords = tf.tile(coords[tf.newaxis, :, :, :],
+                       multiples=[batch, 1, 1, 1])
+      x = tf.concat([x, coords], axis=-1)
+
+    # shape: [B, H, W, bottleneck_channels].
+    queries = self.queries_conv(x)
+    # shape: [B, H, W, bottleneck_channels].
+    keys = self.keys_conv(x)
+    # shape: [B, H, W, bottleneck_channels].
+    values = self.values_conv(x)
+
+    keys_height, keys_width = height, width
+    if self._pool_size:
+      keys_height = height // self._pool_size
+      keys_width = width // self._pool_size
+      # shape: [B, H', W', bottleneck_channels].
+      keys = self.maxpool_keys(keys)
+      values = self.maxpool_values(values)
+
+    # Produce pairwise scores.
+    queries = tf.reshape(
+        queries, [batch, height * width, self._bottleneck_channels])
+    keys = tf.reshape(
+        keys, [batch, keys_height * keys_width, self._bottleneck_channels])
+    # shape = [B, H*W, H'*W'].
+    scores = tf.linalg.matmul(queries, keys, transpose_b=True)
+    if self._pairwise_fn == 'dot':
+      normalization = tf.cast(height * width, dtype=tf.float32)
+      scores = (1./normalization) * scores
+    elif self._pairwise_fn == 'embedded_softmax':
+      scores = tf.nn.softmax(scores, axis=-1)
+
+    # Multiply scores with values.
+    # shape = [B, H'*W', bottleneck_channels].
+    values = tf.reshape(
+        values, [batch, keys_height * keys_width, self._bottleneck_channels])
+    # shape = [B, H, W, bottleneck_channels].
+    weighted_values = tf.linalg.matmul(scores, values)
+    weighted_values = tf.reshape(
+        weighted_values, [batch, height, width, self._bottleneck_channels])
+
+    # Construct residual.
+    expand = self.batchnorm(self.expand_conv(weighted_values))
+    output = expand + inputs
+    return output
--- a/research/object_detection/models/keras_models/nonlocal_block_tf2_test.py
+++ b/research/object_detection/models/keras_models/nonlocal_block_tf2_test.py
+"""Tests for google3.third_party.tensorflow_models.object_detection.models.keras_models.nonlocal_block."""
+import unittest
+from absl.testing import parameterized
+import tensorflow as tf
+
+from object_detection.models.keras_models import nonlocal_block
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class NonlocalTest(test_case.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters([{'pool_size': None,
+                              'add_coord_conv': False},
+                             {'pool_size': None,
+                              'add_coord_conv': True},
+                             {'pool_size': 2,
+                              'add_coord_conv': False},
+                             {'pool_size': 2,
+                              'add_coord_conv': True}])
+  def test_run_nonlocal_block(self, pool_size, add_coord_conv):
+    nonlocal_op = nonlocal_block.NonLocalBlock(
+        8, pool_size=pool_size, add_coord_conv=add_coord_conv)
+    def graph_fn():
+      inputs = tf.zeros((4, 16, 16, 32), dtype=tf.float32)
+      outputs = nonlocal_op(inputs)
+      return outputs
+    outputs = self.execute(graph_fn, [])
+    self.assertAllEqual([4, 16, 16, 32], outputs.shape)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/models/keras_models/resnet_v1.py
+++ b/research/object_detection/models/keras_models/resnet_v1.py
@@ -19,7 +19,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from keras.applications import resnet
+from tensorflow.python.keras.applications import resnet

 import tensorflow.compat.v1 as tf


--- a/research/object_detection/packages/tf2/setup.py
+++ b/research/object_detection/packages/tf2/setup.py
@@ -21,7 +21,11 @@ REQUIRED_PACKAGES = [
    'lvis',
    'scipy',
    'pandas',
-    'tf-models-official'
+    # tensorflow 2.5.0 requires grpcio~=1.34.0.
+    # tf-models-official (which requires google-could-bigquery)  ends
+    # up installing the latest grpcio which causes problems later.
+    'google-cloud-bigquery==1.21.0',
+    'tf-models-official',
 ]

 setup(

--- a/research/object_detection/protos/center_net.proto
+++ b/research/object_detection/protos/center_net.proto
@@ -446,6 +446,10 @@ message CenterNet {
    // The mode for jitterting box ROIs. See RandomJitterBoxes in
    // preprocessor.proto for more details
    optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT];
+
+    // Weight for the box consistency loss as described in the BoxInst paper
+    // https://arxiv.org/abs/2012.02310
+    optional float box_consistency_loss_weight = 16 [default=0.0];
  }

  optional DeepMACMaskEstimation deepmac_mask_estimation = 14;

--- a/research/object_detection/utils/config_util.py
+++ b/research/object_detection/utils/config_util.py
@@ -639,6 +639,12 @@ def _maybe_update_config_with_key_value(configs, key, value):
    _update_rescore_instances(configs["model"], value)
  elif field_name == "unmatched_keypoint_score":
    _update_unmatched_keypoint_score(configs["model"], value)
+  elif field_name == "score_distance_multiplier":
+    _update_score_distance_multiplier(configs["model"], value)
+  elif field_name == "std_dev_multiplier":
+    _update_std_dev_multiplier(configs["model"], value)
+  elif field_name == "rescoring_threshold":
+    _update_rescoring_threshold(configs["model"], value)
  else:
    return False
  return True
@@ -1135,10 +1141,12 @@ def _update_candidate_search_scale(model_config, search_scale):

 def _update_candidate_ranking_mode(model_config, mode):
  """Updates how keypoints are snapped to candidates in CenterNet."""
-  if mode not in ("min_distance", "score_distance_ratio"):
+  if mode not in ("min_distance", "score_distance_ratio",
+                  "score_scaled_distance_ratio", "gaussian_weighted"):
    raise ValueError("Attempting to set the keypoint candidate ranking mode "
-                     "to {}, but the only options are 'min_distance' and "
-                     "'score_distance_ratio'.".format(mode))
+                     "to {}, but the only options are 'min_distance', "
+                     "'score_distance_ratio', 'score_scaled_distance_ratio', "
+                     "'gaussian_weighted'.".format(mode))
  meta_architecture = model_config.WhichOneof("model")
  if meta_architecture == "center_net":
    if len(model_config.center_net.keypoint_estimation_task) == 1:
@@ -1214,3 +1222,50 @@ def _update_unmatched_keypoint_score(model_config, score):
                         "unmatched_keypoint_score since there are multiple "
                         "keypoint estimation tasks")

+
+def _update_score_distance_multiplier(model_config, score_distance_multiplier):
+  """Updates the keypoint candidate selection metric. See CenterNet proto."""
+  meta_architecture = model_config.WhichOneof("model")
+  if meta_architecture == "center_net":
+    if len(model_config.center_net.keypoint_estimation_task) == 1:
+      kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
+      kpt_estimation_task.score_distance_multiplier = score_distance_multiplier
+    else:
+      tf.logging.warning("Ignoring config override key for "
+                         "score_distance_multiplier since there are multiple "
+                         "keypoint estimation tasks")
+  else:
+    raise ValueError(
+        "Unsupported meta_architecture type: %s" % meta_architecture)
+
+
+def _update_std_dev_multiplier(model_config, std_dev_multiplier):
+  """Updates the keypoint candidate selection metric. See CenterNet proto."""
+  meta_architecture = model_config.WhichOneof("model")
+  if meta_architecture == "center_net":
+    if len(model_config.center_net.keypoint_estimation_task) == 1:
+      kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
+      kpt_estimation_task.std_dev_multiplier = std_dev_multiplier
+    else:
+      tf.logging.warning("Ignoring config override key for "
+                         "std_dev_multiplier since there are multiple "
+                         "keypoint estimation tasks")
+  else:
+    raise ValueError(
+        "Unsupported meta_architecture type: %s" % meta_architecture)
+
+
+def _update_rescoring_threshold(model_config, rescoring_threshold):
+  """Updates the keypoint candidate selection metric. See CenterNet proto."""
+  meta_architecture = model_config.WhichOneof("model")
+  if meta_architecture == "center_net":
+    if len(model_config.center_net.keypoint_estimation_task) == 1:
+      kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
+      kpt_estimation_task.rescoring_threshold = rescoring_threshold
+    else:
+      tf.logging.warning("Ignoring config override key for "
+                         "rescoring_threshold since there are multiple "
+                         "keypoint estimation tasks")
+  else:
+    raise ValueError(
+        "Unsupported meta_architecture type: %s" % meta_architecture)
--- a/research/object_detection/utils/np_box_ops.py
+++ b/research/object_detection/utils/np_box_ops.py
@@ -70,7 +70,7 @@ def iou(boxes1, boxes2):

  Args:
    boxes1: a numpy array with shape [N, 4] holding N boxes.
-    boxes2: a numpy array with shape [M, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding M boxes.

  Returns:
    a numpy array with shape [N, M] representing pairwise iou scores.
@@ -92,7 +92,7 @@ def ioa(boxes1, boxes2):

  Args:
    boxes1: a numpy array with shape [N, 4] holding N boxes.
-    boxes2: a numpy array with shape [M, 4] holding N boxes.
+    boxes2: a numpy array with shape [M, 4] holding M boxes.

  Returns:
    a numpy array with shape [N, M] representing pairwise ioa scores.

--- a/research/object_detection/utils/target_assigner_utils.py
+++ b/research/object_detection/utils/target_assigner_utils.py
@@ -16,6 +16,8 @@

 import tensorflow.compat.v1 as tf

+from object_detection.core import box_list
+from object_detection.core import box_list_ops
 from object_detection.utils import shape_utils


@@ -290,7 +292,8 @@ def get_valid_keypoint_mask_for_class(keypoint_coordinates,


 def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
-                                          weights=None):
+                                          weights=None,
+                                          boxes_scale=1.0):
  """Apply weights at pixel locations.

  This function is used to generate the pixel weight mask (usually in the output
@@ -332,6 +335,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
      a value to apply in each box region. Note that if blackout=True for a
      given box, the weight will be zero. If None, all weights are assumed to be
      1.
+    boxes_scale: The amount to scale the height/width of the boxes before
+      constructing the blackout regions. This is often useful to guarantee that
+      the proper weight fully covers the object boxes/masks during supervision,
+      as shifting might occur during image resizing, network stride, etc.

  Returns:
    A float tensor with shape [height, width] where all values within the
@@ -347,6 +354,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
  (y_grid, x_grid) = image_shape_to_grids(height, width)
  y_grid = tf.expand_dims(y_grid, axis=0)
  x_grid = tf.expand_dims(x_grid, axis=0)
+  boxlist = box_list.BoxList(boxes)
+  boxlist = box_list_ops.scale_height_width(
+      boxlist, y_scale=boxes_scale, x_scale=boxes_scale)
+  boxes = boxlist.get()
  y_min = tf.expand_dims(boxes[:, 0:1], axis=-1)
  x_min = tf.expand_dims(boxes[:, 1:2], axis=-1)
  y_max = tf.expand_dims(boxes[:, 2:3], axis=-1)