first draft of making the context rcnn module tf2 compatible

cbd0576f · Kaushik Shivakumar · 7ebcbe20 · cbd0576f · cbd0576f · cbd0576f
Commit cbd0576f authored Jun 26, 2020 by Kaushik Shivakumar
4 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_lib.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib.py
@@ -21,11 +21,14 @@ from __future__ import print_function
 import tensorflow.compat.v1 as tf
 import tf_slim as slim

+class BatchNormAndProj():
+  def __init__(self):
+    self.batch_norm = None
+    self.projection = None

 # The negative value used in padding the invalid weights.
 _NEGATIVE_PADDING_VALUE = -100000

-
 def filter_weight_value(weights, values, valid_mask):
  """Filters weights and values based on valid_mask.

@@ -96,8 +99,7 @@ def compute_valid_mask(num_valid_elements, num_elements):
  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
  return valid_mask

-
-def project_features(features, projection_dimension, is_training, normalize):
+def project_features(features, projection_dimension, is_training, node=None, normalize=True):
  """Projects features to another feature space.

  Args:
@@ -111,6 +113,17 @@ def project_features(features, projection_dimension, is_training, normalize):
  Returns:
    A float Tensor of shape [batch, features_size, projection_dimension].
  """
+  if node is None:
+    node = {}
+  if 'batch_norm' not in node:
+    node['batch_norm'] = tf.keras.layers.BatchNormalization(epsilon=0.001, center=True, scale=True, momentum=0.97)
+  if 'projection' not in node:
+    print("Creating new projection")
+    node['projection'] = tf.keras.layers.Dense(units=projection_dimension,
+                                        activation=tf.nn.relu6,
+                                        use_bias=True)
+
+
  # TODO(guanhangwu) Figure out a better way of specifying the batch norm
  # params.
  batch_norm_params = {
@@ -120,16 +133,16 @@ def project_features(features, projection_dimension, is_training, normalize):
      "center": True,
      "scale": True
  }
-
-  batch_size, _, num_features = features.shape
+  shape_arr = features.shape
+  batch_size = shape_arr[0]
+  feature_size = shape_arr[1]
+  num_features = shape_arr[2]
  features = tf.reshape(features, [-1, num_features])
-  projected_features = slim.fully_connected(
-      features,
-      num_outputs=projection_dimension,
-      activation_fn=tf.nn.relu6,
-      normalizer_fn=slim.batch_norm,
-      normalizer_params=batch_norm_params)
+  
+  batch_norm_features = node['batch_norm'](features)
+  projected_features = node['projection'](batch_norm_features, training=is_training)

+  print(projected_features.shape)
  projected_features = tf.reshape(projected_features,
                                  [batch_size, -1, projection_dimension])

@@ -141,7 +154,7 @@ def project_features(features, projection_dimension, is_training, normalize):

 def attention_block(input_features, context_features, bottleneck_dimension,
                    output_dimension, attention_temperature, valid_mask,
-                    is_training):
+                    is_training, attention_projections):
  """Generic attention block.

  Args:
@@ -165,12 +178,13 @@ def attention_block(input_features, context_features, bottleneck_dimension,

  with tf.variable_scope("AttentionBlock"):
    queries = project_features(
-        input_features, bottleneck_dimension, is_training, normalize=True)
+        input_features, bottleneck_dimension, is_training, node=attention_projections["query"], normalize=True)
    keys = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
+        context_features, bottleneck_dimension, is_training, node=attention_projections["key"], normalize=True)
    values = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
+        context_features, bottleneck_dimension, is_training, node=attention_projections["val"], normalize=True)

+  print(attention_projections['query'])
  weights = tf.matmul(queries, keys, transpose_b=True)

  weights, values = filter_weight_value(weights, values, valid_mask)
@@ -179,13 +193,13 @@ def attention_block(input_features, context_features, bottleneck_dimension,

  features = tf.matmul(weights, values)
  output_features = project_features(
-      features, output_dimension, is_training, normalize=False)
+      features, output_dimension, is_training, node=attention_projections["feature"], normalize=False)
  return output_features


 def compute_box_context_attention(box_features, context_features,
                                  valid_context_size, bottleneck_dimension,
-                                  attention_temperature, is_training):
+                                  attention_temperature, is_training, attention_projections):
  """Computes the attention feature from the context given a batch of box.

  Args:
@@ -214,9 +228,9 @@ def compute_box_context_attention(box_features, context_features,
  box_features = tf.reduce_mean(box_features, [2, 3])

  output_features = attention_block(box_features, context_features,
-                                    bottleneck_dimension, channels.value,
+                                    bottleneck_dimension, channels,
                                    attention_temperature, valid_mask,
-                                    is_training)
+                                    is_training, attention_projections)

  # Expands the dimension back to match with the original feature map.
  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]

--- a/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for context_rcnn_lib."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from absl.testing import parameterized
+import tensorflow.compat.v1 as tf
+
+from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.utils import test_case
+from object_detection.utils import tf_version
+
+_NEGATIVE_PADDING_VALUE = -100000
+
+class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
+                         tf.test.TestCase):
+  """Tests for the functions in context_rcnn_lib."""
+
+  def test_compute_valid_mask(self):
+    num_elements = tf.constant(3, tf.int32)
+    num_valid_elementss = tf.constant((1, 2), tf.int32)
+    valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
+                                                     num_elements)
+    expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
+    self.assertAllEqual(valid_mask, expected_valid_mask)
+
+  def test_filter_weight_value(self):
+    weights = tf.ones((2, 3, 2), tf.float32) * 4
+    values = tf.ones((2, 2, 4), tf.float32)
+    valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
+                                    [[4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4],
+                                     [4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[1, 1, 1, 1], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+    # Changes the valid_mask so the results will be different.
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+
+    filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
+        weights, values, valid_mask)
+    expected_weights = tf.constant(
+        [[[4, 4], [4, 4], [4, 4]],
+         [[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
+          [_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
+
+    expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
+                                   [[0, 0, 0, 0], [0, 0, 0, 0]]])
+    self.assertAllEqual(filtered_weights, expected_weights)
+    self.assertAllEqual(filtered_values, expected_values)
+
+  @parameterized.parameters((2, True, True), (2, False, True),
+                            (10, True, False), (10, False, False))
+  def test_project_features(self, projection_dimension, is_training, normalize):
+    features = tf.ones([2, 3, 4], tf.float32)
+    projected_features = context_rcnn_lib.project_features(
+        features,
+        projection_dimension,
+        is_training=is_training,
+        normalize=normalize)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
+
+  @parameterized.parameters(
+      (2, 10, 1),
+      (3, 10, 2),
+      (4, 20, 3),
+      (5, 20, 4),
+      (7, 20, 5),
+  )
+  def test_attention_block(self, bottleneck_dimension, output_dimension,
+                           attention_temperature):
+    input_features = tf.ones([2, 3, 4], tf.float32)
+    context_features = tf.ones([2, 2, 3], tf.float32)
+    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
+    is_training = False
+    projection_layers = {"key": {}, "val": {}, "query": {}, "feature": {}}
+    output_features = context_rcnn_lib.attention_block(
+        input_features, context_features, bottleneck_dimension,
+        output_dimension, attention_temperature, valid_mask, is_training, projection_layers)
+
+    # Makes sure the shape is correct.
+    self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
+
+  @parameterized.parameters(True, False)
+  def test_compute_box_context_attention(self, is_training):
+    box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
+    context_features = tf.ones([2, 5, 6], tf.float32)
+    valid_context_size = tf.constant((2, 3), tf.int32)
+    bottleneck_dimension = 10
+    attention_temperature = 1
+    projection_layers = {"key": {}, "val": {}, "query": {}, "feature": {}}
+    attention_features = context_rcnn_lib.compute_box_context_attention(
+        box_features, context_features, valid_context_size,
+        bottleneck_dimension, attention_temperature, is_training, projection_layers)
+    # Makes sure the shape is correct.
+    self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -75,7 +75,8 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
               return_raw_detections_during_predict=False,
               output_final_box_features=False,
               attention_bottleneck_dimension=None,
-               attention_temperature=None):
+               attention_temperature=None,
+               attention_projections=None):
    """ContextRCNNMetaArch Constructor.

    Args:
@@ -213,6 +214,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
      attention_bottleneck_dimension: A single integer. The bottleneck feature
        dimension of the attention block.
      attention_temperature: A single float. The attention temperature.
+      attention_projection_layers: 

    Raises:
      ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
@@ -269,6 +271,11 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        bottleneck_dimension=attention_bottleneck_dimension,
        attention_temperature=attention_temperature,
        is_training=is_training)
+    
+    self._attention_projections = {"key": {},
+                                  "val": {},
+                                  "query": {},
+                                  "feature": {}}

  @staticmethod
  def get_side_inputs(features):
@@ -327,10 +334,13 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        features_to_crop, proposal_boxes_normalized,
        [self._initial_crop_size, self._initial_crop_size])

+    print(self._attention_projections)
    attention_features = self._context_feature_extract_fn(
        box_features=box_features,
        context_features=context_features,
-        valid_context_size=valid_context_size)
+        valid_context_size=valid_context_size,
+        attention_projections=self._attention_projections)
+    print(self._attention_projections)

    # Adds box features with attention features.
    box_features += attention_features

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf2_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch_tf2_test.py