Commit c81c01b2 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

add back support for tf1

parent 40ea6deb
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for context_rcnn_lib."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from absl.testing import parameterized
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures import context_rcnn_lib_v1 as context_rcnn_lib
from object_detection.utils import test_case
from object_detection.utils import tf_version
_NEGATIVE_PADDING_VALUE = -100000
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
tf.test.TestCase):
"""Tests for the functions in context_rcnn_lib."""
def test_compute_valid_mask(self):
num_elements = tf.constant(3, tf.int32)
num_valid_elementss = tf.constant((1, 2), tf.int32)
valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
num_elements)
expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
self.assertAllEqual(valid_mask, expected_valid_mask)
def test_filter_weight_value(self):
weights = tf.ones((2, 3, 2), tf.float32) * 4
values = tf.ones((2, 2, 4), tf.float32)
valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
weights, values, valid_mask)
expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
[[4, _NEGATIVE_PADDING_VALUE + 4],
[4, _NEGATIVE_PADDING_VALUE + 4],
[4, _NEGATIVE_PADDING_VALUE + 4]]])
expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
[[1, 1, 1, 1], [0, 0, 0, 0]]])
self.assertAllEqual(filtered_weights, expected_weights)
self.assertAllEqual(filtered_values, expected_values)
# Changes the valid_mask so the results will be different.
valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
weights, values, valid_mask)
expected_weights = tf.constant(
[[[4, 4], [4, 4], [4, 4]],
[[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
[[0, 0, 0, 0], [0, 0, 0, 0]]])
self.assertAllEqual(filtered_weights, expected_weights)
self.assertAllEqual(filtered_values, expected_values)
@parameterized.parameters((2, True, True), (2, False, True),
(10, True, False), (10, False, False))
def test_project_features(self, projection_dimension, is_training, normalize):
features = tf.ones([2, 3, 4], tf.float32)
projected_features = context_rcnn_lib.project_features(
features,
projection_dimension,
is_training=is_training,
normalize=normalize)
# Makes sure the shape is correct.
self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
@parameterized.parameters(
(2, 10, 1),
(3, 10, 2),
(4, 20, 3),
(5, 20, 4),
(7, 20, 5),
)
def test_attention_block(self, bottleneck_dimension, output_dimension,
attention_temperature):
input_features = tf.ones([2, 3, 4], tf.float32)
context_features = tf.ones([2, 2, 3], tf.float32)
valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
is_training = False
output_features = context_rcnn_lib.attention_block(
input_features, context_features, bottleneck_dimension,
output_dimension, attention_temperature, valid_mask, is_training)
# Makes sure the shape is correct.
self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
@parameterized.parameters(True, False)
def test_compute_box_context_attention(self, is_training):
box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
context_features = tf.ones([2, 5, 6], tf.float32)
valid_context_size = tf.constant((2, 3), tf.int32)
bottleneck_dimension = 10
attention_temperature = 1
attention_features = context_rcnn_lib.compute_box_context_attention(
box_features, context_features, valid_context_size,
bottleneck_dimension, attention_temperature, is_training)
# Makes sure the shape is correct.
self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for context_rcnn_lib."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from absl.testing import parameterized
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures import context_rcnn_lib_v2 as context_rcnn_lib
from object_detection.utils import test_case
from object_detection.utils import tf_version
_NEGATIVE_PADDING_VALUE = -100000
class ContextRcnnLibTest(parameterized.TestCase, test_case.TestCase,
tf.test.TestCase):
"""Tests for the functions in context_rcnn_lib."""
def test_compute_valid_mask(self):
num_elements = tf.constant(3, tf.int32)
num_valid_elementss = tf.constant((1, 2), tf.int32)
valid_mask = context_rcnn_lib.compute_valid_mask(num_valid_elementss,
num_elements)
expected_valid_mask = tf.constant([[1, 0, 0], [1, 1, 0]], tf.float32)
self.assertAllEqual(valid_mask, expected_valid_mask)
def test_filter_weight_value(self):
weights = tf.ones((2, 3, 2), tf.float32) * 4
values = tf.ones((2, 2, 4), tf.float32)
valid_mask = tf.constant([[True, True], [True, False]], tf.bool)
filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
weights, values, valid_mask)
expected_weights = tf.constant([[[4, 4], [4, 4], [4, 4]],
[[4, _NEGATIVE_PADDING_VALUE + 4],
[4, _NEGATIVE_PADDING_VALUE + 4],
[4, _NEGATIVE_PADDING_VALUE + 4]]])
expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
[[1, 1, 1, 1], [0, 0, 0, 0]]])
self.assertAllEqual(filtered_weights, expected_weights)
self.assertAllEqual(filtered_values, expected_values)
# Changes the valid_mask so the results will be different.
valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
filtered_weights, filtered_values = context_rcnn_lib.filter_weight_value(
weights, values, valid_mask)
expected_weights = tf.constant(
[[[4, 4], [4, 4], [4, 4]],
[[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4],
[_NEGATIVE_PADDING_VALUE + 4, _NEGATIVE_PADDING_VALUE + 4]]])
expected_values = tf.constant([[[1, 1, 1, 1], [1, 1, 1, 1]],
[[0, 0, 0, 0], [0, 0, 0, 0]]])
self.assertAllEqual(filtered_weights, expected_weights)
self.assertAllEqual(filtered_values, expected_values)
@parameterized.parameters((2, True, True), (2, False, True),
(10, True, False), (10, False, False))
def test_project_features(self, projection_dimension, is_training, normalize):
features = tf.ones([2, 3, 4], tf.float32)
projected_features = context_rcnn_lib.project_features(
features,
projection_dimension,
is_training,
context_rcnn_lib.ContextProjection(projection_dimension, False),
normalize=normalize)
# Makes sure the shape is correct.
self.assertAllEqual(projected_features.shape, [2, 3, projection_dimension])
@parameterized.parameters(
(2, 10, 1),
(3, 10, 2),
(4, 20, 3),
(5, 20, 4),
(7, 20, 5),
)
def test_attention_block(self, bottleneck_dimension, output_dimension,
attention_temperature):
input_features = tf.ones([2, 3, 4], tf.float32)
context_features = tf.ones([2, 2, 3], tf.float32)
valid_mask = tf.constant([[True, True], [False, False]], tf.bool)
is_training = False
#projection_layers = {context_rcnn_lib.KEY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False), context_rcnn_lib.VALUE_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False),
# context_rcnn_lib.QUERY_NAME: context_rcnn_lib.ContextProjection(bottleneck_dimension, False)}
#Add in the feature layer because this is further down the pipeline and it isn't automatically injected.
#projection_layers['feature'] = context_rcnn_lib.ContextProjection(output_dimension, False)
attention_block = context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False)
attention_block.set_output_dimension(output_dimension)
output_features = attention_block([input_features, context_features], is_training, valid_mask)
# Makes sure the shape is correct.
self.assertAllEqual(output_features.shape, [2, 3, output_dimension])
@parameterized.parameters(True, False)
def test_compute_box_context_attention(self, is_training):
box_features = tf.ones([2, 3, 4, 4, 4], tf.float32)
context_features = tf.ones([2, 5, 6], tf.float32)
valid_context_size = tf.constant((2, 3), tf.int32)
bottleneck_dimension = 10
attention_temperature = 1
attention_features = context_rcnn_lib.compute_box_context_attention(
box_features, context_features, valid_context_size,
is_training, context_rcnn_lib.AttentionBlock(bottleneck_dimension, attention_temperature, False))
# Makes sure the shape is correct.
self.assertAllEqual(attention_features.shape, [2, 3, 1, 1, 4])
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library functions for ContextRCNN."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
import tf_slim as slim
# The negative value used in padding the invalid weights.
_NEGATIVE_PADDING_VALUE = -100000
def filter_weight_value(weights, values, valid_mask):
"""Filters weights and values based on valid_mask.
_NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
Args:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
valid and False means invalid.
Returns:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
Raises:
ValueError: If shape of doesn't match.
"""
w_batch_size, _, w_context_size = weights.shape
v_batch_size, v_context_size, _ = values.shape
m_batch_size, m_context_size = valid_mask.shape
if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
raise ValueError("Please make sure the first dimension of the input"
" tensors are the same.")
if w_context_size != v_context_size:
raise ValueError("Please make sure the third dimension of weights matches"
" the second dimension of values.")
if w_context_size != m_context_size:
raise ValueError("Please make sure the third dimension of the weights"
" matches the second dimension of the valid_mask.")
valid_mask = valid_mask[..., tf.newaxis]
# Force the invalid weights to be very negative so it won't contribute to
# the softmax.
weights += tf.transpose(
tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
_NEGATIVE_PADDING_VALUE,
perm=[0, 2, 1])
# Force the invalid values to be 0.
values *= tf.cast(valid_mask, values.dtype)
return weights, values
def compute_valid_mask(num_valid_elements, num_elements):
"""Computes mask of valid entries within padded context feature.
Args:
num_valid_elements: A int32 Tensor of shape [batch_size].
num_elements: An int32 Tensor.
Returns:
A boolean Tensor of the shape [batch_size, num_elements]. True means
valid and False means invalid.
"""
batch_size = num_valid_elements.shape[0]
element_idxs = tf.range(num_elements, dtype=tf.int32)
batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
num_valid_elements = num_valid_elements[..., tf.newaxis]
valid_mask = tf.less(batch_element_idxs, num_valid_elements)
return valid_mask
def project_features(features, projection_dimension, is_training, normalize):
"""Projects features to another feature space.
Args:
features: A float Tensor of shape [batch_size, features_size,
num_features].
projection_dimension: A int32 Tensor.
is_training: A boolean Tensor (affecting batch normalization).
normalize: A boolean Tensor. If true, the output features will be l2
normalized on the last dimension.
Returns:
A float Tensor of shape [batch, features_size, projection_dimension].
"""
# TODO(guanhangwu) Figure out a better way of specifying the batch norm
# params.
batch_norm_params = {
"is_training": is_training,
"decay": 0.97,
"epsilon": 0.001,
"center": True,
"scale": True
}
batch_size, _, num_features = features.shape
features = tf.reshape(features, [-1, num_features])
projected_features = slim.fully_connected(
features,
num_outputs=projection_dimension,
activation_fn=tf.nn.relu6,
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params)
projected_features = tf.reshape(projected_features,
[batch_size, -1, projection_dimension])
if normalize:
projected_features = tf.math.l2_normalize(projected_features, axis=-1)
return projected_features
def attention_block(input_features, context_features, bottleneck_dimension,
output_dimension, attention_temperature, valid_mask,
is_training):
"""Generic attention block.
Args:
input_features: A float Tensor of shape [batch_size, input_size,
num_input_features].
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
output_dimension: A int32 Tensor representing the last dimension of the
output feature.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
valid_mask: A boolean Tensor of shape [batch_size, context_size].
is_training: A boolean Tensor (affecting batch normalization).
Returns:
A float Tensor of shape [batch_size, input_size, output_dimension].
"""
with tf.variable_scope("AttentionBlock"):
queries = project_features(
input_features, bottleneck_dimension, is_training, normalize=True)
keys = project_features(
context_features, bottleneck_dimension, is_training, normalize=True)
values = project_features(
context_features, bottleneck_dimension, is_training, normalize=True)
weights = tf.matmul(queries, keys, transpose_b=True)
weights, values = filter_weight_value(weights, values, valid_mask)
weights = tf.nn.softmax(weights / attention_temperature)
features = tf.matmul(weights, values)
output_features = project_features(
features, output_dimension, is_training, normalize=False)
return output_features
def compute_box_context_attention(box_features, context_features,
valid_context_size, bottleneck_dimension,
attention_temperature, is_training):
"""Computes the attention feature from the context given a batch of box.
Args:
box_features: A float Tensor of shape [batch_size, max_num_proposals,
height, width, channels]. It is pooled features from first stage
proposals.
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
is_training: A boolean Tensor (affecting batch normalization).
Returns:
A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
"""
_, context_size, _ = context_features.shape
valid_mask = compute_valid_mask(valid_context_size, context_size)
channels = box_features.shape[-1]
# Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels].
box_features = tf.reduce_mean(box_features, [2, 3])
output_features = attention_block(box_features, context_features,
bottleneck_dimension, channels.value,
attention_temperature, valid_mask,
is_training)
# Expands the dimension back to match with the original feature map.
output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
return output_features
\ No newline at end of file
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library functions for ContextRCNN."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
# The negative value used in padding the invalid weights.
_NEGATIVE_PADDING_VALUE = -100000
KEY_NAME = 'key'
VALUE_NAME = 'val'
QUERY_NAME = 'query'
FEATURE_NAME = 'feature'
class ContextProjection(tf.keras.layers.Layer):
"""Custom layer to do batch normalization and projection."""
def __init__(self, projection_dimension, freeze_batchnorm, **kwargs):
self.batch_norm = tf.keras.layers.BatchNormalization(
epsilon=0.001,
center=True,
scale=True,
momentum=0.97,
trainable=(not freeze_batchnorm))
self.projection = tf.keras.layers.Dense(units=projection_dimension,
activation=tf.nn.relu6,
use_bias=True)
super(ContextProjection, self).__init__(**kwargs)
def build(self, input_shape):
self.batch_norm.build(input_shape)
self.projection.build(input_shape)
def call(self, input_features, is_training=False):
return self.projection(self.batch_norm(input_features, is_training))
class AttentionBlock(tf.keras.layers.Layer):
"""Custom layer to perform all attention."""
def __init__(self, bottleneck_dimension, attention_temperature,
freeze_batchnorm, output_dimension=None, **kwargs):
self.key_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.val_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.query_proj = ContextProjection(bottleneck_dimension, freeze_batchnorm)
self.attention_temperature = attention_temperature
self.freeze_batchnorm = freeze_batchnorm
self.bottleneck_dimension = bottleneck_dimension
if output_dimension:
self.output_dimension = output_dimension
super(AttentionBlock, self).__init__(**kwargs)
def set_output_dimension(self, new_output_dimension):
self.output_dimension = new_output_dimension
def build(self, input_shapes):
self.feature_proj = ContextProjection(self.output_dimension,
self.freeze_batchnorm)
def call(self, input_features, is_training, valid_mask):
"""Handles a call by performing attention"""
input_features, context_features = input_features
with tf.variable_scope("AttentionBlock"):
queries = project_features(
input_features, self.bottleneck_dimension, is_training,
self.query_proj, normalize=True)
keys = project_features(
context_features, self.bottleneck_dimension, is_training,
self.key_proj, normalize=True)
values = project_features(
context_features, self.bottleneck_dimension, is_training,
self.val_proj, normalize=True)
weights = tf.matmul(queries, keys, transpose_b=True)
weights, values = filter_weight_value(weights, values, valid_mask)
weights = tf.nn.softmax(weights / self.attention_temperature)
features = tf.matmul(weights, values)
output_features = project_features(
features, self.output_dimension, is_training,
self.feature_proj, normalize=False)
return output_features
def filter_weight_value(weights, values, valid_mask):
"""Filters weights and values based on valid_mask.
_NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
Args:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
valid and False means invalid.
Returns:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
Raises:
ValueError: If shape of doesn't match.
"""
w_batch_size, _, w_context_size = weights.shape
v_batch_size, v_context_size, _ = values.shape
m_batch_size, m_context_size = valid_mask.shape
if w_batch_size != v_batch_size or v_batch_size != m_batch_size:
raise ValueError("Please make sure the first dimension of the input"
" tensors are the same.")
if w_context_size != v_context_size:
raise ValueError("Please make sure the third dimension of weights matches"
" the second dimension of values.")
if w_context_size != m_context_size:
raise ValueError("Please make sure the third dimension of the weights"
" matches the second dimension of the valid_mask.")
valid_mask = valid_mask[..., tf.newaxis]
# Force the invalid weights to be very negative so it won't contribute to
# the softmax.
weights += tf.transpose(
tf.cast(tf.math.logical_not(valid_mask), weights.dtype) *
_NEGATIVE_PADDING_VALUE,
perm=[0, 2, 1])
# Force the invalid values to be 0.
values *= tf.cast(valid_mask, values.dtype)
return weights, values
def project_features(features, bottleneck_dimension, is_training,
layer, normalize=True):
"""Projects features to another feature space.
Args:
features: A float Tensor of shape [batch_size, features_size,
num_features].
projection_dimension: A int32 Tensor.
is_training: A boolean Tensor (affecting batch normalization).
node: Contains a custom layer specific to the particular operation
being performed (key, value, query, features)
normalize: A boolean Tensor. If true, the output features will be l2
normalized on the last dimension.
Returns:
A float Tensor of shape [batch, features_size, projection_dimension].
"""
shape_arr = features.shape
batch_size, _, num_features = shape_arr
features = tf.reshape(features, [-1, num_features])
projected_features = layer(features, is_training)
projected_features = tf.reshape(projected_features,
[batch_size, -1, bottleneck_dimension])
if normalize:
projected_features = tf.keras.backend.l2_normalize(projected_features,
axis=-1)
return projected_features
def compute_valid_mask(num_valid_elements, num_elements):
"""Computes mask of valid entries within padded context feature.
Args:
num_valid_elements: A int32 Tensor of shape [batch_size].
num_elements: An int32 Tensor.
Returns:
A boolean Tensor of the shape [batch_size, num_elements]. True means
valid and False means invalid.
"""
batch_size = num_valid_elements.shape[0]
element_idxs = tf.range(num_elements, dtype=tf.int32)
batch_element_idxs = tf.tile(element_idxs[tf.newaxis, ...], [batch_size, 1])
num_valid_elements = num_valid_elements[..., tf.newaxis]
valid_mask = tf.less(batch_element_idxs, num_valid_elements)
return valid_mask
def compute_box_context_attention(box_features, context_features,
valid_context_size, is_training,
attention_block):
"""Computes the attention feature from the context given a batch of box.
Args:
box_features: A float Tensor of shape [batch_size, max_num_proposals,
height, width, channels]. It is pooled features from first stage
proposals.
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
is_training: A boolean Tensor (affecting batch normalization).
freeze_batchnorm: Whether to freeze batch normalization weights.
attention_projections: Dictionary of the projection layers.
Returns:
A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
"""
_, context_size, _ = context_features.shape
valid_mask = compute_valid_mask(valid_context_size, context_size)
channels = box_features.shape[-1]
attention_block.set_output_dimension(channels)
# Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels].
box_features = tf.reduce_mean(box_features, [2, 3])
output_features = attention_block([box_features, context_features],
is_training, valid_mask)
# Expands the dimension back to match with the original feature map.
output_features = output_features[:, :, tf.newaxis, tf.newaxis, :]
return output_features
......@@ -26,8 +26,9 @@ from __future__ import print_function
import functools
from object_detection.core import standard_fields as fields
from object_detection.meta_architectures import context_rcnn_lib
from object_detection.meta_architectures import context_rcnn_lib_v1, context_rcnn_lib_v2
from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.utils import tf_version
class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
......@@ -265,14 +266,18 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
return_raw_detections_during_predict),
output_final_box_features=output_final_box_features)
if tf_version.is_tf1():
self._context_feature_extract_fn = functools.partial(
context_rcnn_lib.compute_box_context_attention,
context_rcnn_lib_v1.compute_box_context_attention,
bottleneck_dimension=attention_bottleneck_dimension,
attention_temperature=attention_temperature,
is_training=is_training)
else:
self._context_feature_extract_fn = functools.partial(
context_rcnn_lib_v2.compute_box_context_attention,
is_training=is_training,
freeze_batchnorm=freeze_batchnorm)
self._attention_block = context_rcnn_lib.AttentionBlock(attention_bottleneck_dimension, attention_temperature, freeze_batchnorm)
attention_block=context_rcnn_lib_v2.AttentionBlock(
attention_bottleneck_dimension, attention_temperature, freeze_batchnorm))
@staticmethod
def get_side_inputs(features):
......@@ -335,8 +340,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
attention_features = self._context_feature_extract_fn(
box_features=box_features,
context_features=context_features,
valid_context_size=valid_context_size,
attention_block=self._attention_block)
valid_context_size=valid_context_size)
# Adds box features with attention features.
box_features += attention_features
......
......@@ -438,15 +438,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic=masks_are_class_agnostic,
share_box_across_classes=share_box_across_classes), **common_kwargs)
@mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
def test_prediction_mock(self, mock_context_rcnn_lib):
"""Mocks the context_rcnn_lib module to test the prediction.
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF2.X only test.')
@mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib_v1')
def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
"""Mocks the context_rcnn_lib_v1 module to test the prediction.
Using mock object so that we can ensure compute_box_context_attention is
called in side the prediction function.
Args:
mock_context_rcnn_lib: mock module for the context_rcnn_lib.
mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
"""
model = self._build_model(
is_training=False,
......@@ -455,7 +456,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
num_classes=42)
mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor
mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
inputs_shape = (2, 20, 20, 3)
inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
......@@ -477,7 +478,49 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
side_inputs = model.get_side_inputs(features)
_ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
mock_context_rcnn_lib.compute_box_context_attention.assert_called_once()
mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF1.X only test.')
@mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib_v2')
def test_prediction_mock_tf2(self, mock_context_rcnn_lib_v2):
"""Mocks the context_rcnn_lib_v2 module to test the prediction.
Using mock object so that we can ensure compute_box_context_attention is
called in side the prediction function.
Args:
mock_context_rcnn_lib_v2: mock module for the context_rcnn_lib_v2.
"""
model = self._build_model(
is_training=False,
number_of_stages=2,
second_stage_batch_size=6,
num_classes=42)
mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
mock_context_rcnn_lib_v2.compute_box_context_attention.return_value = mock_tensor
inputs_shape = (2, 20, 20, 3)
inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
dtype=tf.float32)
preprocessed_inputs, true_image_shapes = model.preprocess(inputs)
context_features = tf.random_uniform((2, 20, 10),
minval=0,
maxval=255,
dtype=tf.float32)
valid_context_size = tf.random_uniform((2,),
minval=0,
maxval=10,
dtype=tf.int32)
features = {
fields.InputDataFields.context_features: context_features,
fields.InputDataFields.valid_context_size: valid_context_size
}
side_inputs = model.get_side_inputs(features)
_ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
mock_context_rcnn_lib_v2.compute_box_context_attention.assert_called_once()
@parameterized.named_parameters(
{'testcase_name': 'static_shapes', 'static_shapes': True},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment