make significant fixes suggested

cbd607ab · Kaushik Shivakumar · e7667f6f · e7667f6f · cbd607ab · cbd607ab
Commit cbd607ab authored Jul 16, 2020 by Kaushik Shivakumar
3 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_lib_v1.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_v1.py
-# Lint as: python3
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Library functions for ContextRCNN."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import tensorflow.compat.v1 as tf
-import tf_slim as slim
-# The negative value used in padding the invalid weights.
-_NEGATIVE_PADDING_VALUE = -100000
-def filter_weight_value(weights, values, valid_mask):
-  """Filters weights and values based on valid_mask.
-  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
-  avoid their contribution in softmax. 0 will be set for the invalid elements in
-  the values.
-  Args:
-    weights: A float Tensor of shape [batch_size, input_size, context_size].
-    values: A float Tensor of shape [batch_size, context_size,
-      projected_dimension].
-    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
-      valid and False means invalid.
-  Returns:
-    weights: A float Tensor of shape [batch_size, input_size, context_size].
-    values: A float Tensor of shape [batch_size, context_size,
-      projected_dimension].
-  Raises:
-    ValueError: If shape of doesn't match.
-  """
-  w_batch_size, _, w_context_size = weights.shape
-  v_batch_size, v_context_size, _ = values.shape
-  m_batch_size, m_context_size = valid_mask.shape
-  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
-    raise ValueError("Please make sure the first dimension of the input"
-                     " tensors are the same.")
-  if w_context_size != v_context_size:
-    raise ValueError("Please make sure the third dimension of weights matches"
-                     " the second dimension of values.")
-  if w_context_size != m_context_size:
-    raise ValueError("Please make sure the third dimension of the weights"
-                     " matches the second dimension of the valid_mask.")
-  valid_mask = valid_mask[..., tf.newaxis]
-  # Force the invalid weights to be very negative so it won't contribute to
-  # the softmax.
-  weights += tf.transpose(
-      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
-      _NEGATIVE_PADDING_VALUE,
-      perm=[0, 2, 1])
-  # Force the invalid values to be 0.
-  values *= tf.cast(valid_mask, values.dtype)
-  return weights, values
-def compute_valid_mask(num_valid_elements, num_elements):
-  """Computes mask of valid entries within padded context feature.
-  Args:
-    num_valid_elements: A int32 Tensor of shape [batch_size].
-    num_elements: An int32 Tensor.
-  Returns:
-    A boolean Tensor of the shape [batch_size, num_elements]. True means
-      valid and False means invalid.
-  """
-  batch_size = num_valid_elements.shape[0]
-  element_idxs = tf.range(num_elements, dtype=tf.int32)
-  batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
-  num_valid_elements = num_valid_elements[..., tf.newaxis]
-  valid_mask = tf.less(batch_element_idxs, num_valid_elements)
-  return valid_mask
-def project_features(features, projection_dimension, is_training, normalize):
-  """Projects features to another feature space.
-  Args:
-    features: A float Tensor of shape [batch_size, features_size,
-      num_features].
-    projection_dimension: A int32 Tensor.
-    is_training: A boolean Tensor (affecting batch normalization).
-    normalize: A boolean Tensor. If true, the output features will be l2
-      normalized on the last dimension.
-  Returns:
-    A float Tensor of shape [batch, features_size, projection_dimension].
-  """
-  # TODO(guanhangwu) Figure out a better way of specifying the batch norm
-  # params.
-  batch_norm_params = {
-      "is_training": is_training,
-      "decay": 0.97,
-      "epsilon": 0.001,
-      "center": True,
-      "scale": True
-  }
-  batch_size, _, num_features = features.shape
-  features = tf.reshape(features, [-1, num_features])
-  projected_features = slim.fully_connected(
-      features,
-      num_outputs=projection_dimension,
-      activation_fn=tf.nn.relu6,
-      normalizer_fn=slim.batch_norm,
-      normalizer_params=batch_norm_params)
-  projected_features = tf.reshape(projected_features,
-                                  [batch_size, -1, projection_dimension])
-  if normalize:
-    projected_features = tf.math.l2_normalize(projected_features, axis=-1)
-  return projected_features
-def attention_block(input_features, context_features, bottleneck_dimension,
-                    output_dimension, attention_temperature, valid_mask,
-                    is_training):
-  """Generic attention block.
-  Args:
-    input_features: A float Tensor of shape [batch_size, input_size,
-      num_input_features].
-    context_features: A float Tensor of shape [batch_size, context_size,
-      num_context_features].
-    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
-      for intermediate projections.
-    output_dimension: A int32 Tensor representing the last dimension of the
-      output feature.
-    attention_temperature: A float Tensor. It controls the temperature of the
-      softmax for weights calculation. The formula for calculation as follows:
-        weights = exp(weights / temperature) / sum(exp(weights / temperature))
-    valid_mask: A boolean Tensor of shape [batch_size, context_size].
-    is_training: A boolean Tensor (affecting batch normalization).
-  Returns:
-    A float Tensor of shape [batch_size, input_size, output_dimension].
-  """
-  with tf.variable_scope("AttentionBlock"):
-    queries = project_features(
-        input_features, bottleneck_dimension, is_training, normalize=True)
-    keys = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
-    values = project_features(
-        context_features, bottleneck_dimension, is_training, normalize=True)
-  weights = tf.matmul(queries, keys, transpose_b=True)
-  weights, values = filter_weight_value(weights, values, valid_mask)
-  weights = tf.nn.softmax(weights / attention_temperature)
-  features = tf.matmul(weights, values)
-  output_features = project_features(
-      features, output_dimension, is_training, normalize=False)
-  return output_features
-def compute_box_context_attention(box_features, context_features,
-                                  valid_context_size, bottleneck_dimension,
-                                  attention_temperature, is_training):
-  """Computes the attention feature from the context given a batch of box.
-  Args:
-    box_features: A float Tensor of shape [batch_size, max_num_proposals,
-      height, width, channels]. It is pooled features from first stage
-      proposals.
-    context_features: A float Tensor of shape [batch_size, context_size,
-      num_context_features].
-    valid_context_size: A int32 Tensor of shape [batch_size].
-    bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
-      for intermediate projections.
-    attention_temperature: A float Tensor. It controls the temperature of the
-      softmax for weights calculation. The formula for calculation as follows:
-        weights = exp(weights / temperature) / sum(exp(weights / temperature))
-    is_training: A boolean Tensor (affecting batch normalization).
-  Returns:
-    A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
-  """
-  _, context_size, _ = context_features.shape
-  valid_mask = compute_valid_mask(valid_context_size, context_size)
-  channels = box_features.shape[-1]
-  # Average pools over height and width dimension so that the shape of
-  # box_features becomes [batch_size, max_num_proposals, channels].
-  box_features = tf.reduce_mean(box_features, [2, 3])
-  output_features = attention_block(box_features, context_features,
-                                    bottleneck_dimension, channels.value,
-                                    attention_temperature, valid_mask,
-                                    is_training)
-  # Expands the dimension back to match with the original feature map.
-  output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
-  return output_features
\ No newline at end of file
--- a/research/object_detection/meta_architectures/context_rcnn_lib_v2.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_v2.py
@@ -53,36 +53,35 @@ class AttentionBlock(tf.keras.layers.Layer):
  """Custom layer to perform all attention."""
  def __init__(self, bottleneck_dimension, attention_temperature,
               freeze_batchnorm, output_dimension=None, **kwargs):
-    self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self._key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
-    self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self._val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
-    self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
+    self._query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
-    self.feature_proj = None
+    self._feature_proj = None
-    self.attention_temperature = attention_temperature
+    self._attention_temperature = attention_temperature
-    self.freeze_batchnorm = freeze_batchnorm
+    self._freeze_batchnorm = freeze_batchnorm
-    self.bottleneck_dimension = bottleneck_dimension
+    self._bottleneck_dimension = bottleneck_dimension
-    self.output_dimension = output_dimension
+    self._output_dimension = output_dimension
+    self._is_training = is_training
    super(AttentionBlock, self).__init__(**kwargs)
  def set_output_dimension(self, output_dim):
-    self.output_dimension = output_dim
+    self._output_dimension = output_dim
  def build(self, input_shapes):
    pass
-  def call(self, input_features, is_training, valid_context_size):
+  def call(self, input_features, context_features, valid_context_size):
    """Handles a call by performing attention."""
-    input_features, context_features = input_features
    _, context_size, _ = context_features.shape
    valid_mask = compute_valid_mask(valid_context_size, context_size)
    channels = input_features.shape[-1]
    #Build the feature projection layer
-    if (not self.output_dimension):
+    if (not self._output_dimension):
-      self.output_dimension = channels
+      self._output_dimension = channels
-    if (not self.feature_proj):
+    if (not self._feature_proj):
-      self.feature_proj = ContextProjection(self.output_dimension,
+      self._feature_proj = ContextProjection(self._output_dimension,
-                                            self.freeze_batchnorm)
+                                            self._freeze_batchnorm)
    # Average pools over height and width dimension so that the shape of
    # box_features becomes [batch_size, max_num_proposals, channels].
@@ -90,25 +89,25 @@ class AttentionBlock(tf.keras.layers.Layer):
    with tf.variable_scope("AttentionBlock"):
      queries = project_features(
-          input_features, self.bottleneck_dimension, is_training,
+          input_features, self._bottleneck_dimension, self._is_training,
-          self.query_proj, normalize=True)
+          self._query_proj, normalize=True)
      keys = project_features(
-          context_features, self.bottleneck_dimension, is_training,
+          context_features, self._bottleneck_dimension, self._is_training,
-          self.key_proj, normalize=True)
+          self._key_proj, normalize=True)
      values = project_features(
-          context_features, self.bottleneck_dimension, is_training,
+          context_features, self._bottleneck_dimension, self._is_training,
-          self.val_proj, normalize=True)
+          self._val_proj, normalize=True)
    weights = tf.matmul(queries, keys, transpose_b=True)
    weights, values = filter_weight_value(weights, values, valid_mask)
-    weights = tf.nn.softmax(weights / self.attention_temperature)
+    weights = tf.nn.softmax(weights / self._attention_temperature)
    features = tf.matmul(weights, values)
    output_features = project_features(
-        features, self.output_dimension, is_training,
+        features, self._output_dimension, self._is_training,
-        self.feature_proj, normalize=False)
+        self._feature_proj, normalize=False)
    output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]

--- a/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
+++ b/research/object_detection/meta_architectures/context_rcnn_meta_arch.py
@@ -26,7 +26,8 @@ from __future__ import print_function
 import functools
 from object_detection.core import standard_fields as fields
-from object_detection.meta_architectures import context_rcnn_lib, context_rcnn_lib_v2
+from object_detection.meta_architectures import context_rcnn_lib
+from object_detection.meta_architectures import context_rcnn_lib_v2
 from object_detection.meta_architectures import faster_rcnn_meta_arch
 from object_detection.utils import tf_version
@@ -271,10 +272,11 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
          attention_temperature=attention_temperature,
          is_training=is_training)
    else:
-      self._attention_block = context_rcnn_lib_v2.AttentionBlock(
+      self._context_feature_extract_fn = context_rcnn_lib_v2.AttentionBlock(
-          attention_bottleneck_dimension, attention_temperature,
+          bottleneck_dimension=attention_bottleneck_dimension,
-          freeze_batchnorm)
+          attention_temperature=attention_temperature,
-      self._is_training = is_training
+          freeze_batchnorm=freeze_batchnorm,
+          is_training=is_training)
  @staticmethod
  def get_side_inputs(features):
@@ -335,15 +337,10 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
        features_to_crop, proposal_boxes_normalized,
        [self._initial_crop_size, self._initial_crop_size])
-    if tf_version.is_tf1():
+    attention_features = self._context_feature_extract_fn(
-      attention_features = self._context_feature_extract_fn(
+        box_features=box_features,
-          box_features=box_features,
+        context_features=context_features,
-          context_features=context_features,
+        valid_context_size=valid_context_size)
-          valid_context_size=valid_context_size)
-    else:
-      attention_features = self._attention_block(
-          [box_features, context_features], self._is_training,
-          valid_context_size)
    # Adds box features with attention features.
    box_features += attention_features