add back support for tf1

c81c01b2 · Kaushik Shivakumar · 40ea6deb · c81c01b2 · c81c01b2 · c81c01b2
Commit c81c01b2 authored Jun 30, 2020 by Kaushik Shivakumar
6 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf1_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for context_rcnn_lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from absl.testing import parameterized
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import context_rcnn_lib_v1 as context_rcnn_lib
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
+class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
+                         tf.test.TestCase):
+  """Tests for the functions in context_rcnn_lib."""
+
+  def test_compute_valid_mask(self):
+    num_elements = tf.constant(3, tf.int32)
+    num_valid_elementss = tf.constant((1, 2), tf.int32)
+    valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
+                                                     num_elements)
+    expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
+    self.assertAllEqual(valid_mask, expected_valid_mask)
+
+  def test_filter_weight_value(self):
+    weights = tf.ones((2, 3, 2), tf.float32) * 4
+    values = tf.ones((2, 2, 4), tf.float32)
+    valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
+                                    [[4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[1, 1, 1, 1], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+    # Changes the valid_mask so the results will be different.
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant(
+        [[[4, 4], [4, 4], [4, 4]],
+         [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[0, 0, 0, 0], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+  @parameterized.parameters((2, True, True), (2, False, True),
+                            (10, True, False), (10, False, False))
+  def test_project_features(self, projection_dimension, is_training, normalize):
+    features = tf.ones([2, 3, 4], tf.float32)
+    projected_features = context_rcnn_lib.project_features(
+        features,
+        projection_dimension,
+        is_training=is_training,
+        normalize=normalize)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
+
+  @parameterized.parameters(
+      (2, 10, 1),
+      (3, 10, 2),
+      (4, 20, 3),
+      (5, 20, 4),
+      (7, 20, 5),
+  )
+  def test_attention_block(self, bottleneck_dimension, output_dimension,
+                           attention_temperature):
+    input_features = tf.ones([2, 3, 4], tf.float32)
+    context_features = tf.ones([2, 2, 3], tf.float32)
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+    is_training = False
+    output_features = context_rcnn_lib.attention_block(
+        input_features, context_features, bottleneck_dimension,
+        output_dimension, attention_temperature, valid_mask, is_training)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
+
+  @parameterized.parameters(True, False)
+  def test_compute_box_context_attention(self, is_training):
+    box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
+    context_features = tf.ones([2, 5, 6], tf.float32)
+    valid_context_size = tf.constant((2, 3), tf.int32)
+    bottleneck_dimension = 10
+    attention_temperature = 1
+    attention_features = context_rcnn_lib.compute_box_context_attention(
+        box_features, context_features, valid_context_size,
+        bottleneck_dimension, attention_temperature, is_training)
+    # Makes sure the shape is correct.
+    self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for context_rcnn_lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from absl.testing import parameterized
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import context_rcnn_lib_v2 as context_rcnn_lib
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+_NEGATIVE_PADDING_VALUE = -100000
+
+class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
+                         tf.test.TestCase):
+  """Tests for the functions in context_rcnn_lib."""
+
+  def test_compute_valid_mask(self):
+    num_elements = tf.constant(3, tf.int32)
+    num_valid_elementss = tf.constant((1, 2), tf.int32)
+    valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
+                                                     num_elements)
+    expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
+    self.assertAllEqual(valid_mask, expected_valid_mask)
+
+  def test_filter_weight_value(self):
+    weights = tf.ones((2, 3, 2), tf.float32) * 4
+    values = tf.ones((2, 2, 4), tf.float32)
+    valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
+                                    [[4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[1, 1, 1, 1], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+    # Changes the valid_mask so the results will be different.
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant(
+        [[[4, 4], [4, 4], [4, 4]],
+         [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[0, 0, 0, 0], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+  @parameterized.parameters((2, True, True), (2, False, True),
+                            (10, True, False), (10, False, False))
+  def test_project_features(self, projection_dimension, is_training, normalize):
+    features = tf.ones([2, 3, 4], tf.float32)
+    projected_features = context_rcnn_lib.project_features(
+        features,
+        projection_dimension,
+        is_training,
+        context_rcnn_lib.ContextProjection(projection_dimension, False),
+        normalize=normalize)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
+
+  @parameterized.parameters(
+      (2, 10, 1),
+      (3, 10, 2),
+      (4, 20, 3),
+      (5, 20, 4),
+      (7, 20, 5),
+  )
+  def test_attention_block(self, bottleneck_dimension, output_dimension,
+                           attention_temperature):
+    input_features = tf.ones([2, 3, 4], tf.float32)
+    context_features = tf.ones([2, 2, 3], tf.float32)
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+    is_training = False
+    #projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
+    #                     context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
+    
+    #Add in the feature layer because this is further down the pipeline and it isn't automatically injected.
+    #projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False)
+
+    attention_block = context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False)
+    attention_block.set_output_dimension(output_dimension)
+    output_features = attention_block([input_features, context_features], is_training, valid_mask)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
+
+  @parameterized.parameters(True, False)
+  def test_compute_box_context_attention(self, is_training):
+    box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
+    context_features = tf.ones([2, 5, 6], tf.float32)
+    valid_context_size = tf.constant((2, 3), tf.int32)
+    bottleneck_dimension = 10
+    attention_temperature = 1
+    
+    attention_features = context_rcnn_lib.compute_box_context_attention(
+        box_features, context_features, valid_context_size,
+        is_training, context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False))
+    # Makes sure the shape is correct.
+    self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/meta_architectures/context_rcnn_lib_v1.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_v1.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Library functions for ContextRCNN."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+import tf_slim as slim
+
+
+# The negative value used in padding the invalid weights.
+_NEGATIVE_PADDING_VALUE = -100000
+
+
+def filter_weight_value(weights, values, valid_mask):
+  """Filters weights and values based on valid_mask.
+
+  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+  avoid their contribution in softmax. 0 will be set for the invalid elements in
+  the values.
+
+  Args:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
+      valid and False means invalid.
+
+  Returns:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+
+  Raises:
+    ValueError: If shape of doesn't match.
+  """
+  w_batch_size, _, w_context_size = weights.shape
+  v_batch_size, v_context_size, _ = values.shape
+  m_batch_size, m_context_size = valid_mask.shape
+  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
+    raise ValueError("Please make sure the first dimension of the input"
+                     " tensors are the same.")
+
+  if w_context_size != v_context_size:
+    raise ValueError("Please make sure the third dimension of weights matches"
+                     " the second dimension of values.")
+
+  if w_context_size != m_context_size:
+    raise ValueError("Please make sure the third dimension of the weights"
+                     " matches the second dimension of the valid_mask.")
+
+  valid_mask = valid_mask[..., tf.newaxis]
+
+  # Force the invalid weights to be very negative so it won't contribute to
+  # the softmax.
+  weights += tf.transpose(
+      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+      _NEGATIVE_PADDING_VALUE,
+      perm=[0, 2, 1])
+
+  # Force the invalid values to be 0.
+  values *= tf.cast(valid_mask, values.dtype)
+
+  return weights, values
+
+
+def compute_valid_mask(num_valid_elements, num_elements):
+  """Computes mask of valid entries within padded context feature.
+
+  Args:
+    num_valid_elements: A int32 Tensor of shape [batch_size].
+    num_elements: An int32 Tensor.
+
+  Returns:
+    A boolean Tensor of the shape [batch_size, num_elements]. True means
+      valid and False means invalid.
+  """
+  batch_size = num_valid_elements.shape[0]
+  element_idxs = tf.range(num_elements, dtype=tf.int32)
+  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
+  num_valid_elements = num_valid_elements[..., tf.newaxis]
+  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
+  return valid_mask
+
+
+def project_features(features, projection_dimension, is_training, normalize):
+  """Projects features to another feature space.
+
+  Args:
+    features: A float Tensor of shape [batch_size, features_size,
+      num_features].
+    projection_dimension: A int32 Tensor.
+    is_training: A boolean Tensor (affecting batch normalization).
+    normalize: A boolean Tensor. If true, the output features will be l2
+      normalized on the last dimension.
+
+  Returns:
+    A float Tensor of shape [batch, features_size, projection_dimension].
+  """
+  # TODO(guanhangwu) Figure out a better way of specifying the batch norm
+  # params.
+  batch_norm_params = {
+      "is_training": is_training,
+      "decay": 0.97,
+      "epsilon": 0.001,
+      "center": True,
+      "scale": True
+  }
+
+  batch_size, _, num_features = features.shape
+  features = tf.reshape(features, [-1, num_features])
+  projected_features = slim.fully_connected(
+      features,
+      num_outputs=projection_dimension,
+      activation_fn=tf.nn.relu6,
+      normalizer_fn=slim.batch_norm,
+      normalizer_params=batch_norm_params)
+
+  projected_features = tf.reshape(projected_features,
+                                  [batch_size, -1, projection_dimension])
+
+  if normalize:
+    projected_features = tf.math.l2_normalize(projected_features, axis=-1)
+
+  return projected_features
+
+
+def attention_block(input_features, context_features, bottleneck_dimension,
+                    output_dimension, attention_temperature, valid_mask,
+                    is_training):
+  """Generic attention block.
+
+  Args:
+    input_features: A float Tensor of shape [batch_size, input_size,
+      num_input_features].
+    context_features: A float Tensor of shape [batch_size, context_size,
+      num_context_features].
+    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+      for intermediate projections.
+    output_dimension: A int32 Tensor representing the last dimension of the
+      output feature.
+    attention_temperature: A float Tensor. It controls the temperature of the
+      softmax for weights calculation. The formula for calculation as follows:
+        weights = exp(weights / temperature) / sum(exp(weights / temperature))
+    valid_mask: A boolean Tensor of shape [batch_size, context_size].
+    is_training: A boolean Tensor (affecting batch normalization).
+
+  Returns:
+    A float Tensor of shape [batch_size, input_size, output_dimension].
+  """
+
+  with tf.variable_scope("AttentionBlock"):
+    queries = project_features(
+        input_features, bottleneck_dimension, is_training, normalize=True)
+    keys = project_features(
+        context_features, bottleneck_dimension, is_training, normalize=True)
+    values = project_features(
+        context_features, bottleneck_dimension, is_training, normalize=True)
+
+  weights = tf.matmul(queries, keys, transpose_b=True)
+
+  weights, values = filter_weight_value(weights, values, valid_mask)
+
+  weights = tf.nn.softmax(weights / attention_temperature)
+
+  features = tf.matmul(weights, values)
+  output_features = project_features(
+      features, output_dimension, is_training, normalize=False)
+  return output_features
+
+
+def compute_box_context_attention(box_features, context_features,
+                                  valid_context_size, bottleneck_dimension,
+                                  attention_temperature, is_training):
+  """Computes the attention feature from the context given a batch of box.
+
+  Args:
+    box_features: A float Tensor of shape [batch_size, max_num_proposals,
+      height, width, channels]. It is pooled features from first stage
+      proposals.
+    context_features: A float Tensor of shape [batch_size, context_size,
+      num_context_features].
+    valid_context_size: A int32 Tensor of shape [batch_size].
+    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+      for intermediate projections.
+    attention_temperature: A float Tensor. It controls the temperature of the
+      softmax for weights calculation. The formula for calculation as follows:
+        weights = exp(weights / temperature) / sum(exp(weights / temperature))
+    is_training: A boolean Tensor (affecting batch normalization).
+
+  Returns:
+    A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
+  """
+  _, context_size, _ = context_features.shape
+  valid_mask = compute_valid_mask(valid_context_size, context_size)
+
+  channels = box_features.shape[-1]
+  # Average pools over height and width dimension so that the shape of
+  # box_features becomes [batch_size, max_num_proposals, channels].
+  box_features = tf.reduce_mean(box_features, [2, 3])
+
+  output_features = attention_block(box_features, context_features,
+                                    bottleneck_dimension, channels.value,
+                                    attention_temperature, valid_mask,
+                                    is_training)
+
+  # Expands the dimension back to match with the original feature map.
+  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
+
+  return output_features
+  
\ No newline at end of file
--- a/research/object_detection/meta_architectures/context_rcnn_lib_v2.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_v2.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Library functions for ContextRCNN."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v1 as tf
+
+# The negative value used in padding the invalid weights.
+_NEGATIVE_PADDING_VALUE = -100000
+
+KEY_NAME = 'key'
+VALUE_NAME = 'val'
+QUERY_NAME = 'query'
+FEATURE_NAME = 'feature'
+
+class ContextProjection(tf.keras.layers.Layer):
+  """Custom layer to do batch normalization and projection."""
+  def __init__(self, projection_dimension, freeze_batchnorm, **kwargs):
+    self.batch_norm = tf.keras.layers.BatchNormalization(
+        epsilon=0.001,
+        center=True,
+        scale=True,
+        momentum=0.97,
+        trainable=(not freeze_batchnorm))
+    self.projection = tf.keras.layers.Dense(units=projection_dimension,
+                                            activation=tf.nn.relu6,
+                                            use_bias=True)
+    super(ContextProjection, self).__init__(**kwargs)
+
+  def build(self, input_shape):
+    self.batch_norm.build(input_shape)
+    self.projection.build(input_shape)
+
+  def call(self, input_features, is_training=False):
+    return self.projection(self.batch_norm(input_features, is_training))
+
+class AttentionBlock(tf.keras.layers.Layer):
+  """Custom layer to perform all attention."""
+  def __init__(self, bottleneck_dimension, attention_temperature,
+               freeze_batchnorm, output_dimension=None, **kwargs):
+    self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self.attention_temperature = attention_temperature
+    self.freeze_batchnorm = freeze_batchnorm
+    self.bottleneck_dimension = bottleneck_dimension
+    if output_dimension:
+      self.output_dimension = output_dimension
+    super(AttentionBlock, self).__init__(**kwargs)
+
+  def set_output_dimension(self, new_output_dimension):
+    self.output_dimension = new_output_dimension
+
+  def build(self, input_shapes):
+    self.feature_proj = ContextProjection(self.output_dimension,
+                                          self.freeze_batchnorm)
+
+  def call(self, input_features, is_training, valid_mask):
+    """Handles a call by performing attention"""
+    input_features, context_features = input_features
+    with tf.variable_scope("AttentionBlock"):
+      queries = project_features(
+          input_features, self.bottleneck_dimension, is_training,
+          self.query_proj, normalize=True)
+      keys = project_features(
+          context_features, self.bottleneck_dimension, is_training,
+          self.key_proj, normalize=True)
+      values = project_features(
+          context_features, self.bottleneck_dimension, is_training,
+          self.val_proj, normalize=True)
+
+    weights = tf.matmul(queries, keys, transpose_b=True)
+
+    weights, values = filter_weight_value(weights, values, valid_mask)
+
+    weights = tf.nn.softmax(weights / self.attention_temperature)
+
+    features = tf.matmul(weights, values)
+    output_features = project_features(
+        features, self.output_dimension, is_training,
+        self.feature_proj, normalize=False)
+    return output_features
+
+
+def filter_weight_value(weights, values, valid_mask):
+  """Filters weights and values based on valid_mask.
+
+  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+  avoid their contribution in softmax. 0 will be set for the invalid elements in
+  the values.
+
+  Args:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
+      valid and False means invalid.
+
+  Returns:
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
+
+  Raises:
+    ValueError: If shape of doesn't match.
+  """
+  w_batch_size, _, w_context_size = weights.shape
+  v_batch_size, v_context_size, _ = values.shape
+  m_batch_size, m_context_size = valid_mask.shape
+  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
+    raise ValueError("Please make sure the first dimension of the input"
+                     " tensors are the same.")
+
+  if w_context_size != v_context_size:
+    raise ValueError("Please make sure the third dimension of weights matches"
+                     " the second dimension of values.")
+
+  if w_context_size != m_context_size:
+    raise ValueError("Please make sure the third dimension of the weights"
+                     " matches the second dimension of the valid_mask.")
+
+  valid_mask = valid_mask[..., tf.newaxis]
+
+  # Force the invalid weights to be very negative so it won't contribute to
+  # the softmax.
+  weights += tf.transpose(
+      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+      _NEGATIVE_PADDING_VALUE,
+      perm=[0, 2, 1])
+
+  # Force the invalid values to be 0.
+  values *= tf.cast(valid_mask, values.dtype)
+
+  return weights, values
+
+def project_features(features, bottleneck_dimension, is_training,
+                     layer, normalize=True):
+  """Projects features to another feature space.
+
+  Args:
+    features: A float Tensor of shape [batch_size, features_size,
+      num_features].
+    projection_dimension: A int32 Tensor.
+    is_training: A boolean Tensor (affecting batch normalization).
+    node: Contains a custom layer specific to the particular operation
+          being performed (key, value, query, features)
+    normalize: A boolean Tensor. If true, the output features will be l2
+      normalized on the last dimension.
+
+  Returns:
+    A float Tensor of shape [batch, features_size, projection_dimension].
+  """
+  shape_arr = features.shape
+  batch_size, _, num_features = shape_arr
+  features = tf.reshape(features, [-1, num_features])
+
+  projected_features = layer(features, is_training)
+
+  projected_features = tf.reshape(projected_features,
+                                  [batch_size, -1, bottleneck_dimension])
+
+  if normalize:
+    projected_features = tf.keras.backend.l2_normalize(projected_features,
+                                                       axis=-1)
+
+  return projected_features
+
+def compute_valid_mask(num_valid_elements, num_elements):
+  """Computes mask of valid entries within padded context feature.
+
+  Args:
+    num_valid_elements: A int32 Tensor of shape [batch_size].
+    num_elements: An int32 Tensor.
+
+  Returns:
+    A boolean Tensor of the shape [batch_size, num_elements]. True means
+      valid and False means invalid.
+  """
+  batch_size = num_valid_elements.shape[0]
+  element_idxs = tf.range(num_elements, dtype=tf.int32)
+  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
+  num_valid_elements = num_valid_elements[..., tf.newaxis]
+  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
+  return valid_mask
+
+def compute_box_context_attention(box_features, context_features,
+                                  valid_context_size, is_training,
+                                  attention_block):
+  """Computes the attention feature from the context given a batch of box.
+
+  Args:
+    box_features: A float Tensor of shape [batch_size, max_num_proposals,
+      height, width, channels]. It is pooled features from first stage
+      proposals.
+    context_features: A float Tensor of shape [batch_size, context_size,
+      num_context_features].
+    valid_context_size: A int32 Tensor of shape [batch_size].
+    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
+      for intermediate projections.
+    attention_temperature: A float Tensor. It controls the temperature of the
+      softmax for weights calculation. The formula for calculation as follows:
+        weights = exp(weights / temperature) / sum(exp(weights / temperature))
+    is_training: A boolean Tensor (affecting batch normalization).
+    freeze_batchnorm: Whether to freeze batch normalization weights.
+    attention_projections: Dictionary of the projection layers.
+
+  Returns:
+    A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
+  """
+  _, context_size, _ = context_features.shape
+  valid_mask = compute_valid_mask(valid_context_size, context_size)
+
+  channels = box_features.shape[-1]
+  attention_block.set_output_dimension(channels)
+
+  # Average pools over height and width dimension so that the shape of
+  # box_features becomes [batch_size, max_num_proposals, channels].
+  box_features = tf.reduce_mean(box_features, [2, 3])
+
+  output_features = attention_block([box_features, context_features],
+                                    is_training, valid_mask)
+
+  # Expands the dimension back to match with the original feature map.
+  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
+
+  return output_features
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -26,8 +26,9 @@ from __future__ import print_function
 import functools

 from object_detection.core import standard_fields as fields
-from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.meta_architectures import context_rcnn_lib_v1, context_rcnn_lib_v2
 from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.utils import tf_version


 class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
@@ -265,14 +266,18 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
            return_raw_detections_during_predict),
        output_final_box_features=output_final_box_features)

+    if tf_version.is_tf1():
      self._context_feature_extract_fn = functools.partial(
-        context_rcnn_lib.compute_box_context_attention,
+        context_rcnn_lib_v1.compute_box_context_attention,
        bottleneck_dimension=attention_bottleneck_dimension,
        attention_temperature=attention_temperature,
+        is_training=is_training)
+    else:
+      self._context_feature_extract_fn = functools.partial(
+          context_rcnn_lib_v2.compute_box_context_attention,
          is_training=is_training,
-        freeze_batchnorm=freeze_batchnorm)
-    
-    self._attention_block = context_rcnn_lib.AttentionBlock(attention_bottleneck_dimension, attention_temperature, freeze_batchnorm)
+          attention_block=context_rcnn_lib_v2.AttentionBlock(
+              attention_bottleneck_dimension, attention_temperature, freeze_batchnorm))

  @staticmethod
  def get_side_inputs(features):
@@ -335,8 +340,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
    attention_features = self._context_feature_extract_fn(
        box_features=box_features,
        context_features=context_features,
-        valid_context_size=valid_context_size,
-        attention_block=self._attention_block)
+        valid_context_size=valid_context_size)

    # Adds box features with attention features.
    box_features += attention_features

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
@@ -438,15 +438,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
            masks_are_class_agnostic=masks_are_class_agnostic,
            share_box_across_classes=share_box_across_classes), **common_kwargs)

-  @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
-  def test_prediction_mock(self, mock_context_rcnn_lib):
-    """Mocks the context_rcnn_lib module to test the prediction.
+  @unittest.skipIf(tf_version.is_tf2(), 'Skipping TF2.X only test.')
+  @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib_v1')
+  def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
+    """Mocks the context_rcnn_lib_v1 module to test the prediction.

    Using mock object so that we can ensure compute_box_context_attention is
    called in side the prediction function.

    Args:
-      mock_context_rcnn_lib: mock module for the context_rcnn_lib.
+      mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
    """
    model = self._build_model(
        is_training=False,
@@ -455,7 +456,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
        num_classes=42)
    mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)

-    mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor
+    mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
    inputs_shape = (2, 20, 20, 3)
    inputs = tf.cast(
        tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
@@ -477,7 +478,49 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
    side_inputs = model.get_side_inputs(features)

    _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
-    mock_context_rcnn_lib.compute_box_context_attention.assert_called_once()
+    mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()
+
+  @unittest.skipIf(tf_version.is_tf1(), 'Skipping TF1.X only test.')
+  @mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib_v2')
+  def test_prediction_mock_tf2(self, mock_context_rcnn_lib_v2):
+    """Mocks the context_rcnn_lib_v2 module to test the prediction.
+
+    Using mock object so that we can ensure compute_box_context_attention is
+    called in side the prediction function.
+
+    Args:
+      mock_context_rcnn_lib_v2: mock module for the context_rcnn_lib_v2.
+    """
+    model = self._build_model(
+        is_training=False,
+        number_of_stages=2,
+        second_stage_batch_size=6,
+        num_classes=42)
+    mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
+
+    mock_context_rcnn_lib_v2.compute_box_context_attention.return_value = mock_tensor
+    inputs_shape = (2, 20, 20, 3)
+    inputs = tf.cast(
+        tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
+        dtype=tf.float32)
+    preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
+    context_features = tf.random_uniform((2, 20, 10),
+                                         minval=0,
+                                         maxval=255,
+                                         dtype=tf.float32)
+    valid_context_size = tf.random_uniform((2,),
+                                           minval=0,
+                                           maxval=10,
+                                           dtype=tf.int32)
+    features = {
+        fields.InputDataFields.context_features: context_features,
+        fields.InputDataFields.valid_context_size: valid_context_size
+    }
+
+    side_inputs = model.get_side_inputs(features)
+
+    _ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
+    mock_context_rcnn_lib_v2.compute_box_context_attention.assert_called_once()

  @parameterized.named_parameters(
      {'testcase_name': 'static_shapes', 'static_shapes': True},