Merge pull request #8749 from kmindspark:context_tf2

PiperOrigin-RevId: 322801804

Merge pull request #8749 from kmindspark:context_tf2
PiperOrigin-RevId: 322801804
2ae9c3a6 · TF Object Detection Team · 5af2c9d4 · b4aa41f5 · 2ae9c3a6 · 2ae9c3a6
Commit 2ae9c3a6 authored Jul 23, 2020 by TF Object Detection Team
5 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf2.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Library functions for Context R-CNN."""
+import tensorflow as tf
+
+from object_detection.core import freezable_batch_norm
+
+# The negative value used in padding the invalid weights.
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+class ContextProjection(tf.keras.layers.Layer):
+  """Custom layer to do batch normalization and projection."""
+
+  def __init__(self, projection_dimension, **kwargs):
+    self.batch_norm = freezable_batch_norm.FreezableBatchNorm(
+        epsilon=0.001,
+        center=True,
+        scale=True,
+        momentum=0.97,
+        trainable=True)
+    self.projection = tf.keras.layers.Dense(units=projection_dimension,
+                                            activation=tf.nn.relu6,
+                                            use_bias=True)
+    super(ContextProjection, self).__init__(**kwargs)
+
+  def build(self, input_shape):
+    self.batch_norm.build(input_shape)
+    self.projection.build(input_shape)
+
+  def call(self, input_features, is_training=False):
+    return self.projection(self.batch_norm(input_features, is_training))
+
+
+class AttentionBlock(tf.keras.layers.Layer):
+  """Custom layer to perform all attention."""
+
+  def __init__(self, bottleneck_dimension, attention_temperature,
+               output_dimension=None, is_training=False,
+               name='AttentionBlock', **kwargs):
+    """Constructs an attention block.
+
+    Args:
+      bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+        for intermediate projections.
+      attention_temperature: A float Tensor. It controls the temperature of the
+        softmax for weights calculation. The formula for calculation as follows:
+          weights = exp(weights / temperature) / sum(exp(weights / temperature))
+      output_dimension: A int32 Tensor representing the last dimension of the
+        output feature.
+      is_training: A boolean Tensor (affecting batch normalization).
+      name: A string describing what to name the variables in this block.
+      **kwargs: Additional keyword arguments.
+    """
+
+    self._key_proj = ContextProjection(bottleneck_dimension)
+    self._val_proj = ContextProjection(bottleneck_dimension)
+    self._query_proj = ContextProjection(bottleneck_dimension)
+    self._feature_proj = None
+    self._attention_temperature = attention_temperature
+    self._bottleneck_dimension = bottleneck_dimension
+    self._is_training = is_training
+    self._output_dimension = output_dimension
+    if self._output_dimension:
+      self._feature_proj = ContextProjection(self._output_dimension)
+    super(AttentionBlock, self).__init__(name=name, **kwargs)
+
+  def build(self, input_shapes):
+    """Finishes building the attention block.
+
+    Args:
+      input_shapes: the shape of the primary input box features.
+    """
+    if not self._feature_proj:
+      self._output_dimension = input_shapes[-1]
+      self._feature_proj = ContextProjection(self._output_dimension)
+
+  def call(self, box_features, context_features, valid_context_size):
+    """Handles a call by performing attention.
+
+    Args:
+      box_features: A float Tensor of shape [batch_size, input_size,
+        num_input_features].
+      context_features: A float Tensor of shape [batch_size, context_size,
+        num_context_features].
+      valid_context_size: A int32 Tensor of shape [batch_size].
+
+    Returns:
+      A float Tensor with shape [batch_size, input_size, num_input_features]
+      containing output features after attention with context features.
+    """
+
+    _, context_size, _ = context_features.shape
+    valid_mask = compute_valid_mask(valid_context_size, context_size)
+
+    # Average pools over height and width dimension so that the shape of
+    # box_features becomes [batch_size, max_num_proposals, channels].
+    box_features = tf.reduce_mean(box_features, [2, 3])
+
+    queries = project_features(
+        box_features, self._bottleneck_dimension, self._is_training,
+        self._query_proj, normalize=True)
+    keys = project_features(
+        context_features, self._bottleneck_dimension, self._is_training,
+        self._key_proj, normalize=True)
+    values = project_features(
+        context_features, self._bottleneck_dimension, self._is_training,
+        self._val_proj, normalize=True)
+
+    weights = tf.matmul(queries, keys, transpose_b=True)
+    weights, values = filter_weight_value(weights, values, valid_mask)
+    weights = tf.nn.softmax(weights / self._attention_temperature)
+
+    features = tf.matmul(weights, values)
+    output_features = project_features(
+        features, self._output_dimension, self._is_training,
+        self._feature_proj, normalize=False)
+
+    output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
+
+    return output_features
+
+
+def filter_weight_value(weights, values, valid_mask):
+  """Filters weights and values based on valid_mask.
+
+  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+  avoid their contribution in softmax. 0 will be set for the invalid elements in
+  the values.
+
+  Args:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
+      valid and False means invalid.
+
+  Returns:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+
+  Raises:
+    ValueError: If shape of doesn't match.
+  """
+  w_batch_size, _, w_context_size = weights.shape
+  v_batch_size, v_context_size, _ = values.shape
+  m_batch_size, m_context_size = valid_mask.shape
+  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
+    raise ValueError('Please make sure the first dimension of the input'
+                     ' tensors are the same.')
+
+  if w_context_size != v_context_size:
+    raise ValueError('Please make sure the third dimension of weights matches'
+                     ' the second dimension of values.')
+
+  if w_context_size != m_context_size:
+    raise ValueError('Please make sure the third dimension of the weights'
+                     ' matches the second dimension of the valid_mask.')
+
+  valid_mask = valid_mask[..., tf.newaxis]
+
+  # Force the invalid weights to be very negative so it won't contribute to
+  # the softmax.
+  weights += tf.transpose(
+      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+      _NEGATIVE_PADDING_VALUE,
+      perm=[0, 2, 1])
+
+  # Force the invalid values to be 0.
+  values *= tf.cast(valid_mask, values.dtype)
+
+  return weights, values
+
+
+def project_features(features, bottleneck_dimension, is_training,
+                     layer, normalize=True):
+  """Projects features to another feature space.
+
+  Args:
+    features: A float Tensor of shape [batch_size, features_size,
+      num_features].
+    bottleneck_dimension: A int32 Tensor.
+    is_training: A boolean Tensor (affecting batch normalization).
+    layer: Contains a custom layer specific to the particular operation
+          being performed (key, value, query, features)
+    normalize: A boolean Tensor. If true, the output features will be l2
+      normalized on the last dimension.
+
+  Returns:
+    A float Tensor of shape [batch, features_size, projection_dimension].
+  """
+  shape_arr = features.shape
+  batch_size, _, num_features = shape_arr
+  features = tf.reshape(features, [-1, num_features])
+
+  projected_features = layer(features, is_training)
+
+  projected_features = tf.reshape(projected_features,
+                                  [batch_size, -1, bottleneck_dimension])
+
+  if normalize:
+    projected_features = tf.keras.backend.l2_normalize(projected_features,
+                                                       axis=-1)
+
+  return projected_features
+
+
+def compute_valid_mask(num_valid_elements, num_elements):
+  """Computes mask of valid entries within padded context feature.
+
+  Args:
+    num_valid_elements: A int32 Tensor of shape [batch_size].
+    num_elements: An int32 Tensor.
+
+  Returns:
+    A boolean Tensor of the shape [batch_size, num_elements]. True means
+      valid and False means invalid.
+  """
+  batch_size = num_valid_elements.shape[0]
+  element_idxs = tf.range(num_elements, dtype=tf.int32)
+  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
+  num_valid_elements = num_valid_elements[..., tf.newaxis]
+  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
+  return valid_mask
--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for context_rcnn_lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from absl.testing import parameterized
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import context_rcnn_lib_tf2 as context_rcnn_lib
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
+class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase):
+  """Tests for the functions in context_rcnn_lib."""
+
+  def test_compute_valid_mask(self):
+    num_elements = tf.constant(3, tf.int32)
+    num_valid_elementss = tf.constant((1, 2), tf.int32)
+    valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
+                                                     num_elements)
+    expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
+    self.assertAllEqual(valid_mask, expected_valid_mask)
+
+  def test_filter_weight_value(self):
+    weights = tf.ones((2, 3, 2), tf.float32) * 4
+    values = tf.ones((2, 2, 4), tf.float32)
+    valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
+                                    [[4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[1, 1, 1, 1], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+    # Changes the valid_mask so the results will be different.
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant(
+        [[[4, 4], [4, 4], [4, 4]],
+         [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[0, 0, 0, 0], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+  @parameterized.parameters((2, True, True), (2, False, True),
+                            (10, True, False), (10, False, False))
+  def test_project_features(self, projection_dimension, is_training, normalize):
+    features = tf.ones([2, 3, 4], tf.float32)
+    projected_features = context_rcnn_lib.project_features(
+        features,
+        projection_dimension,
+        is_training,
+        context_rcnn_lib.ContextProjection(projection_dimension),
+        normalize=normalize)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
+
+  @parameterized.parameters(
+      (2, 10, 1),
+      (3, 10, 2),
+      (4, None, 3),
+      (5, 20, 4),
+      (7, None, 5),
+  )
+  def test_attention_block(self, bottleneck_dimension, output_dimension,
+                           attention_temperature):
+    input_features = tf.ones([2, 8, 3, 3, 3], tf.float32)
+    context_features = tf.ones([2, 20, 10], tf.float32)
+    attention_block = context_rcnn_lib.AttentionBlock(
+        bottleneck_dimension,
+        attention_temperature,
+        output_dimension=output_dimension,
+        is_training=False)
+    valid_context_size = tf.random_uniform((2,),
+                                           minval=0,
+                                           maxval=10,
+                                           dtype=tf.int32)
+    output_features = attention_block(input_features, context_features,
+                                      valid_context_size)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(output_features.shape,
+                        [2, 8, 1, 1, (output_dimension or 3)])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -27,7 +27,9 @@ import functools

 from object_detection.core import standard_fields as fields
 from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.meta_architectures import context_rcnn_lib_tf2
 from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.utils import tf_version


 class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
@@ -264,11 +266,17 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
            return_raw_detections_during_predict),
        output_final_box_features=output_final_box_features)

-    self._context_feature_extract_fn = functools.partial(
-        context_rcnn_lib.compute_box_context_attention,
-        bottleneck_dimension=attention_bottleneck_dimension,
-        attention_temperature=attention_temperature,
-        is_training=is_training)
+    if tf_version.is_tf1():
+      self._context_feature_extract_fn = functools.partial(
+          context_rcnn_lib.compute_box_context_attention,
+          bottleneck_dimension=attention_bottleneck_dimension,
+          attention_temperature=attention_temperature,
+          is_training=is_training)
+    else:
+      self._context_feature_extract_fn = context_rcnn_lib_tf2.AttentionBlock(
+          bottleneck_dimension=attention_bottleneck_dimension,
+          attention_temperature=attention_temperature,
+          is_training=is_training)

  @staticmethod
  def get_side_inputs(features):
@@ -323,6 +331,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
    Returns:
      A float32 Tensor with shape [K, new_height, new_width, depth].
    """
+
    box_features = self._crop_and_resize_fn(
        [features_to_crop], proposal_boxes_normalized, None,
        [self._initial_crop_size, self._initial_crop_size])

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf1_test.py
@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor(
    ])


-@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
 class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):

  def _get_model(self, box_predictor, **common_kwargs):
@@ -440,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
            masks_are_class_agnostic=masks_are_class_agnostic,
            share_box_across_classes=share_box_across_classes), **common_kwargs)

+  @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
  @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
-  def test_prediction_mock(self, mock_context_rcnn_lib):
-    """Mocks the context_rcnn_lib module to test the prediction.
+  def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
+    """Mocks the context_rcnn_lib_v1 module to test the prediction.

    Using mock object so that we can ensure compute_box_context_attention is
    called in side the prediction function.

    Args:
-      mock_context_rcnn_lib: mock module for the context_rcnn_lib.
+      mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
    """
    model = self._build_model(
        is_training=False,
@@ -457,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
        num_classes=42)
    mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)

-    mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor
+    mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
    inputs_shape = (2, 20, 20, 3)
    inputs = tf.cast(
        tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
@@ -479,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
    side_inputs = model.get_side_inputs(features)

    _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
-    mock_context_rcnn_lib.compute_box_context_attention.assert_called_once()
+    mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()

  @parameterized.named_parameters(
      {'testcase_name': 'static_shapes', 'static_shapes': True},
@@ -518,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
      }

      side_inputs = model.get_side_inputs(features)
-
      prediction_dict = model.predict(preprocessed_inputs, true_image_shapes,
                                      **side_inputs)
      return (prediction_dict['rpn_box_predictor_features'],

--- a/research/object_detection/model_lib_v2.py
+++ b/research/object_detection/model_lib_v2.py
@@ -117,7 +117,8 @@ def _compute_losses_and_predictions_dicts(

  prediction_dict = model.predict(
      preprocessed_images,
-      features[fields.InputDataFields.true_image_shape])
+      features[fields.InputDataFields.true_image_shape],
+      **model.get_side_inputs(features))
  prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)

  losses_dict = model.loss(