work on fixing

35fda973 · Kaushik Shivakumar · 66e8a904 · 35fda973 · 35fda973
Commit 35fda973 authored Jun 30, 2020 by Kaushik Shivakumar
2 changed files
--- a/research/object_detection/meta_architectures/context_rcnn_lib.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib.py
@@ -50,130 +50,136 @@ class ContextProjection(tf.keras.layers.Layer):
    return self.projection(self.batch_norm(input_features, is_training))
 class AttentionBlock(tf.keras.layers.Layer):
-  def __init__(self, bottleneck_dimension, attention_temperature, freeze_batchnorm, **kwargs):
+  def __init__(self, bottleneck_dimension, attention_temperature, freeze_batchnorm, output_dimension=None, **kwargs):
    self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
    self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
    self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
    self.attention_temperature = attention_temperature
    self.freeze_batchnorm = freeze_batchnorm
    self.bottleneck_dimension = bottleneck_dimension
+    if output_dimension:
+      self.output_dimension = output_dimension
    super(AttentionBlock, self).__init__(**kwargs)
+  def set_output_dimension(self, new_output_dimension):
+    self.output_dimension = new_output_dimension
  def build(self, input_shapes):
-    self.feature_proj = ContextProjection(input_shapes[0][-1], self.freeze_batchnorm)
+    print(input_shapes)
+    self.feature_proj = ContextProjection(self.output_dimension, self.freeze_batchnorm)
    #self.key_proj.build(input_shapes[0])
    #self.val_proj.build(input_shapes[0])
    #self.query_proj.build(input_shapes[0])
    #self.feature_proj.build(input_shapes[0])
    pass
-  def filter_weight_value(self, weights, values, valid_mask):
+  def call(self, input_features, is_training, valid_mask):
-    """Filters weights and values based on valid_mask.
+    input_features, context_features = input_features
+    with tf.variable_scope("AttentionBlock"):
+      queries = project_features(
+          input_features, self.bottleneck_dimension, is_training,
+          self.query_proj, normalize=True)
+      keys = project_features(
+          context_features, self.bottleneck_dimension, is_training,
+          self.key_proj, normalize=True)
+      values = project_features(
+          context_features, self.bottleneck_dimension, is_training,
+          self.val_proj, normalize=True)
-    _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+    weights = tf.matmul(queries, keys, transpose_b=True)
-    avoid their contribution in softmax. 0 will be set for the invalid elements in
-    the values.
-    Args:
+    weights, values = filter_weight_value(weights, values, valid_mask)
-      weights: A float Tensor of shape [batch_size, input_size, context_size].
-      values: A float Tensor of shape [batch_size, context_size,
-        projected_dimension].
-      valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
-        valid and False means invalid.
-    Returns:
+    weights = tf.nn.softmax(weights / self.attention_temperature)
-      weights: A float Tensor of shape [batch_size, input_size, context_size].
-      values: A float Tensor of shape [batch_size, context_size,
-        projected_dimension].
-    Raises:
+    features = tf.matmul(weights, values)
-      ValueError: If shape of doesn't match.
+    output_features = project_features(
-    """
+        features, self.output_dimension, is_training,
-    w_batch_size, _, w_context_size = weights.shape
+        self.feature_proj, normalize=False)
-    v_batch_size, v_context_size, _ = values.shape
+    return output_features
-    m_batch_size, m_context_size = valid_mask.shape
-    if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
-      raise ValueError("Please make sure the first dimension of the input"
-                      " tensors are the same.")
-    if w_context_size != v_context_size:
-      raise ValueError("Please make sure the third dimension of weights matches"
-                      " the second dimension of values.")
-    if w_context_size != m_context_size:
+def filter_weight_value(weights, values, valid_mask):
-      raise ValueError("Please make sure the third dimension of the weights"
+  """Filters weights and values based on valid_mask.
-                      " matches the second dimension of the valid_mask.")
-    valid_mask = valid_mask[..., tf.newaxis]
+  _NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
+  avoid their contribution in softmax. 0 will be set for the invalid elements in
+  the values.
-    # Force the invalid weights to be very negative so it won't contribute to
+  Args:
-    # the softmax.
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
-    weights += tf.transpose(
+    values: A float Tensor of shape [batch_size, context_size,
-        tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+      projected_dimension].
-        _NEGATIVE_PADDING_VALUE,
+    valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
-        perm=[0, 2, 1])
+      valid and False means invalid.
-    # Force the invalid values to be 0.
+  Returns:
-    values *= tf.cast(valid_mask, values.dtype)
+    weights: A float Tensor of shape [batch_size, input_size, context_size].
+    values: A float Tensor of shape [batch_size, context_size,
+      projected_dimension].
-    return weights, values
+  Raises:
+    ValueError: If shape of doesn't match.
+  """
+  w_batch_size, _, w_context_size = weights.shape
+  v_batch_size, v_context_size, _ = values.shape
+  m_batch_size, m_context_size = valid_mask.shape
+  if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
+    raise ValueError("Please make sure the first dimension of the input"
+                    " tensors are the same.")
-  def run_projection(self, features, bottleneck_dimension, is_training, layer, normalize=True):
+  if w_context_size != v_context_size:
-    """Projects features to another feature space.
+    raise ValueError("Please make sure the third dimension of weights matches"
+                    " the second dimension of values.")
-    Args:
+  if w_context_size != m_context_size:
-      features: A float Tensor of shape [batch_size, features_size,
+    raise ValueError("Please make sure the third dimension of the weights"
-        num_features].
+                    " matches the second dimension of the valid_mask.")
-      projection_dimension: A int32 Tensor.
-      is_training: A boolean Tensor (affecting batch normalization).
-      node: Contains a custom layer specific to the particular operation
-            being performed (key, value, query, features)
-      normalize: A boolean Tensor. If true, the output features will be l2
-        normalized on the last dimension.
-    Returns:
+  valid_mask = valid_mask[..., tf.newaxis]
-      A float Tensor of shape [batch, features_size, projection_dimension].
-    """
-    shape_arr = features.shape
-    batch_size, _, num_features = shape_arr
-    print("Orig", features.shape)
-    features = tf.reshape(features, [-1, num_features])
-    projected_features = layer(features, is_training)
+  # Force the invalid weights to be very negative so it won't contribute to
+  # the softmax.
+  weights += tf.transpose(
+      tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
+      _NEGATIVE_PADDING_VALUE,
+      perm=[0, 2, 1])
-    projected_features = tf.reshape(projected_features, [batch_size, -1, bottleneck_dimension])
+  # Force the invalid values to be 0.
-    print(projected_features.shape)
+  values *= tf.cast(valid_mask, values.dtype)
-    if normalize:
+  return weights, values
-      projected_features = tf.keras.backend.l2_normalize(projected_features, axis=-1)
-    return projected_features
+def project_features(features, bottleneck_dimension, is_training, layer, normalize=True):
+  """Projects features to another feature space.
-  def call(self, input_features, is_training, valid_mask):
+  Args:
-    input_features, context_features = input_features
+    features: A float Tensor of shape [batch_size, features_size,
-    with tf.variable_scope("AttentionBlock"):
+      num_features].
-      queries = self.run_projection(
+    projection_dimension: A int32 Tensor.
-          input_features, self.bottleneck_dimension, is_training,
+    is_training: A boolean Tensor (affecting batch normalization).
-          self.query_proj, normalize=True)
+    node: Contains a custom layer specific to the particular operation
-      keys = self.run_projection(
+          being performed (key, value, query, features)
-          context_features, self.bottleneck_dimension, is_training,
+    normalize: A boolean Tensor. If true, the output features will be l2
-          self.key_proj, normalize=True)
+      normalized on the last dimension.
-      values = self.run_projection(
-          context_features, self.bottleneck_dimension, is_training,
-          self.val_proj, normalize=True)
-    weights = tf.matmul(queries, keys, transpose_b=True)
+  Returns:
+    A float Tensor of shape [batch, features_size, projection_dimension].
+  """
+  shape_arr = features.shape
+  batch_size, _, num_features = shape_arr
+  print("Orig", features.shape)
+  features = tf.reshape(features, [-1, num_features])
-    weights, values = self.filter_weight_value(weights, values, valid_mask)
+  projected_features = layer(features, is_training)
-    weights = tf.nn.softmax(weights / self.attention_temperature)
+  projected_features = tf.reshape(projected_features, [batch_size, -1, bottleneck_dimension])
+  print(projected_features.shape)
-    features = tf.matmul(weights, values)
+  if normalize:
-    output_features = self.run_projection(
+    projected_features = tf.keras.backend.l2_normalize(projected_features, axis=-1)
-        features, input_features.shape[-1], is_training,
-        self.feature_proj, normalize=False)
-    return output_features
+  return projected_features
 def compute_valid_mask(num_valid_elements, num_elements):
    """Computes mask of valid entries within padded context feature.
@@ -222,6 +228,7 @@ def compute_box_context_attention(box_features, context_features,
  valid_mask = compute_valid_mask(valid_context_size, context_size)
  channels = box_features.shape[-1]
+  attention_block.set_output_dimension(channels)
  # Average pools over height and width dimension so that the shape of
  # box_features becomes [batch_size, max_num_proposals, channels].

--- a/research/object_detection/meta_architectures/context_rcnn_lib_test.py
+++ b/research/object_detection/meta_architectures/context_rcnn_lib_test.py
@@ -80,9 +80,9 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
    projected_features = context_rcnn_lib.project_features(
        features,
        projection_dimension,
-        is_training=is_training,
+        is_training,
-        normalize=normalize,
+        context_rcnn_lib.ContextProjection(projection_dimension, False),
-        node=context_rcnn_lib.ContextProjection(projection_dimension, False))
+        normalize=normalize)
    # Makes sure the shape is correct.
    self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
@@ -100,15 +100,15 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
    context_features = tf.ones([2, 2, 3], tf.float32)
    valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
    is_training = False
-    projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
+    #projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
-                         context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
+    #                     context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
    #Add in the feature layer because this is further down the pipeline and it isn't automatically injected.
-    projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False)
+    #projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False)
-    output_features = context_rcnn_lib.attention_block(
+    attention_block = context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False)
-        input_features, context_features, bottleneck_dimension,
+    attention_block.set_output_dimension(output_dimension)
-        output_dimension, attention_temperature, valid_mask, is_training, projection_layers)
+    output_features = attention_block([input_features, context_features], is_training, valid_mask)
    # Makes sure the shape is correct.
    self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
@@ -120,12 +120,11 @@ class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
    valid_context_size = tf.constant((2, 3), tf.int32)
    bottleneck_dimension = 10
    attention_temperature = 1
-    projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
-                         context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
    attention_features = context_rcnn_lib.compute_box_context_attention(
        box_features, context_features, valid_context_size,
        bottleneck_dimension, attention_temperature, is_training, 
-        False, projection_layers)
+        False, context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False))
    # Makes sure the shape is correct.
    self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])