Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2ae9c3a6
Commit
2ae9c3a6
authored
Jul 23, 2020
by
TF Object Detection Team
Browse files
Merge pull request #8749 from kmindspark:context_tf2
PiperOrigin-RevId: 322801804
parents
5af2c9d4
b4aa41f5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
380 additions
and
13 deletions
+380
-13
research/object_detection/meta_architectures/context_rcnn_lib_tf2.py
...ject_detection/meta_architectures/context_rcnn_lib_tf2.py
+238
-0
research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
...detection/meta_architectures/context_rcnn_lib_tf2_test.py
+120
-0
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
...ct_detection/meta_architectures/context_rcnn_meta_arch.py
+14
-5
research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
...tection/meta_architectures/context_rcnn_meta_arch_test.py
+6
-7
research/object_detection/model_lib_v2.py
research/object_detection/model_lib_v2.py
+2
-1
No files found.
research/object_detection/meta_architectures/context_rcnn_lib_tf2.py
0 → 100644
View file @
2ae9c3a6
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library functions for Context R-CNN."""
import
tensorflow
as
tf
from
object_detection.core
import
freezable_batch_norm
# The negative value used in padding the invalid weights.
_NEGATIVE_PADDING_VALUE
=
-
100000
class
ContextProjection
(
tf
.
keras
.
layers
.
Layer
):
"""Custom layer to do batch normalization and projection."""
def
__init__
(
self
,
projection_dimension
,
**
kwargs
):
self
.
batch_norm
=
freezable_batch_norm
.
FreezableBatchNorm
(
epsilon
=
0.001
,
center
=
True
,
scale
=
True
,
momentum
=
0.97
,
trainable
=
True
)
self
.
projection
=
tf
.
keras
.
layers
.
Dense
(
units
=
projection_dimension
,
activation
=
tf
.
nn
.
relu6
,
use_bias
=
True
)
super
(
ContextProjection
,
self
).
__init__
(
**
kwargs
)
def
build
(
self
,
input_shape
):
self
.
batch_norm
.
build
(
input_shape
)
self
.
projection
.
build
(
input_shape
)
def
call
(
self
,
input_features
,
is_training
=
False
):
return
self
.
projection
(
self
.
batch_norm
(
input_features
,
is_training
))
class
AttentionBlock
(
tf
.
keras
.
layers
.
Layer
):
"""Custom layer to perform all attention."""
def
__init__
(
self
,
bottleneck_dimension
,
attention_temperature
,
output_dimension
=
None
,
is_training
=
False
,
name
=
'AttentionBlock'
,
**
kwargs
):
"""Constructs an attention block.
Args:
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
output_dimension: A int32 Tensor representing the last dimension of the
output feature.
is_training: A boolean Tensor (affecting batch normalization).
name: A string describing what to name the variables in this block.
**kwargs: Additional keyword arguments.
"""
self
.
_key_proj
=
ContextProjection
(
bottleneck_dimension
)
self
.
_val_proj
=
ContextProjection
(
bottleneck_dimension
)
self
.
_query_proj
=
ContextProjection
(
bottleneck_dimension
)
self
.
_feature_proj
=
None
self
.
_attention_temperature
=
attention_temperature
self
.
_bottleneck_dimension
=
bottleneck_dimension
self
.
_is_training
=
is_training
self
.
_output_dimension
=
output_dimension
if
self
.
_output_dimension
:
self
.
_feature_proj
=
ContextProjection
(
self
.
_output_dimension
)
super
(
AttentionBlock
,
self
).
__init__
(
name
=
name
,
**
kwargs
)
def
build
(
self
,
input_shapes
):
"""Finishes building the attention block.
Args:
input_shapes: the shape of the primary input box features.
"""
if
not
self
.
_feature_proj
:
self
.
_output_dimension
=
input_shapes
[
-
1
]
self
.
_feature_proj
=
ContextProjection
(
self
.
_output_dimension
)
def
call
(
self
,
box_features
,
context_features
,
valid_context_size
):
"""Handles a call by performing attention.
Args:
box_features: A float Tensor of shape [batch_size, input_size,
num_input_features].
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
Returns:
A float Tensor with shape [batch_size, input_size, num_input_features]
containing output features after attention with context features.
"""
_
,
context_size
,
_
=
context_features
.
shape
valid_mask
=
compute_valid_mask
(
valid_context_size
,
context_size
)
# Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels].
box_features
=
tf
.
reduce_mean
(
box_features
,
[
2
,
3
])
queries
=
project_features
(
box_features
,
self
.
_bottleneck_dimension
,
self
.
_is_training
,
self
.
_query_proj
,
normalize
=
True
)
keys
=
project_features
(
context_features
,
self
.
_bottleneck_dimension
,
self
.
_is_training
,
self
.
_key_proj
,
normalize
=
True
)
values
=
project_features
(
context_features
,
self
.
_bottleneck_dimension
,
self
.
_is_training
,
self
.
_val_proj
,
normalize
=
True
)
weights
=
tf
.
matmul
(
queries
,
keys
,
transpose_b
=
True
)
weights
,
values
=
filter_weight_value
(
weights
,
values
,
valid_mask
)
weights
=
tf
.
nn
.
softmax
(
weights
/
self
.
_attention_temperature
)
features
=
tf
.
matmul
(
weights
,
values
)
output_features
=
project_features
(
features
,
self
.
_output_dimension
,
self
.
_is_training
,
self
.
_feature_proj
,
normalize
=
False
)
output_features
=
output_features
[:,
:,
tf
.
newaxis
,
tf
.
newaxis
,
:]
return
output_features
def
filter_weight_value
(
weights
,
values
,
valid_mask
):
"""Filters weights and values based on valid_mask.
_NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
Args:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
valid and False means invalid.
Returns:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
Raises:
ValueError: If shape of doesn't match.
"""
w_batch_size
,
_
,
w_context_size
=
weights
.
shape
v_batch_size
,
v_context_size
,
_
=
values
.
shape
m_batch_size
,
m_context_size
=
valid_mask
.
shape
if
w_batch_size
!=
v_batch_size
or
v_batch_size
!=
m_batch_size
:
raise
ValueError
(
'Please make sure the first dimension of the input'
' tensors are the same.'
)
if
w_context_size
!=
v_context_size
:
raise
ValueError
(
'Please make sure the third dimension of weights matches'
' the second dimension of values.'
)
if
w_context_size
!=
m_context_size
:
raise
ValueError
(
'Please make sure the third dimension of the weights'
' matches the second dimension of the valid_mask.'
)
valid_mask
=
valid_mask
[...,
tf
.
newaxis
]
# Force the invalid weights to be very negative so it won't contribute to
# the softmax.
weights
+=
tf
.
transpose
(
tf
.
cast
(
tf
.
math
.
logical_not
(
valid_mask
),
weights
.
dtype
)
*
_NEGATIVE_PADDING_VALUE
,
perm
=
[
0
,
2
,
1
])
# Force the invalid values to be 0.
values
*=
tf
.
cast
(
valid_mask
,
values
.
dtype
)
return
weights
,
values
def
project_features
(
features
,
bottleneck_dimension
,
is_training
,
layer
,
normalize
=
True
):
"""Projects features to another feature space.
Args:
features: A float Tensor of shape [batch_size, features_size,
num_features].
bottleneck_dimension: A int32 Tensor.
is_training: A boolean Tensor (affecting batch normalization).
layer: Contains a custom layer specific to the particular operation
being performed (key, value, query, features)
normalize: A boolean Tensor. If true, the output features will be l2
normalized on the last dimension.
Returns:
A float Tensor of shape [batch, features_size, projection_dimension].
"""
shape_arr
=
features
.
shape
batch_size
,
_
,
num_features
=
shape_arr
features
=
tf
.
reshape
(
features
,
[
-
1
,
num_features
])
projected_features
=
layer
(
features
,
is_training
)
projected_features
=
tf
.
reshape
(
projected_features
,
[
batch_size
,
-
1
,
bottleneck_dimension
])
if
normalize
:
projected_features
=
tf
.
keras
.
backend
.
l2_normalize
(
projected_features
,
axis
=-
1
)
return
projected_features
def
compute_valid_mask
(
num_valid_elements
,
num_elements
):
"""Computes mask of valid entries within padded context feature.
Args:
num_valid_elements: A int32 Tensor of shape [batch_size].
num_elements: An int32 Tensor.
Returns:
A boolean Tensor of the shape [batch_size, num_elements]. True means
valid and False means invalid.
"""
batch_size
=
num_valid_elements
.
shape
[
0
]
element_idxs
=
tf
.
range
(
num_elements
,
dtype
=
tf
.
int32
)
batch_element_idxs
=
tf
.
tile
(
element_idxs
[
tf
.
newaxis
,
...],
[
batch_size
,
1
])
num_valid_elements
=
num_valid_elements
[...,
tf
.
newaxis
]
valid_mask
=
tf
.
less
(
batch_element_idxs
,
num_valid_elements
)
return
valid_mask
research/object_detection/meta_architectures/context_rcnn_lib_tf2_test.py
0 → 100644
View file @
2ae9c3a6
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for context_rcnn_lib."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
from
absl.testing
import
parameterized
import
tensorflow.compat.v1
as
tf
from
object_detection.meta_architectures
import
context_rcnn_lib_tf2
as
context_rcnn_lib
from
object_detection.utils
import
test_case
from
object_detection.utils
import
tf_version
_NEGATIVE_PADDING_VALUE
=
-
100000
@
unittest
.
skipIf
(
tf_version
.
is_tf1
(),
'Skipping TF2.X only test.'
)
class
ContextRcnnLibTest
(
parameterized
.
TestCase
,
test_case
.
TestCase
):
"""Tests for the functions in context_rcnn_lib."""
def
test_compute_valid_mask
(
self
):
num_elements
=
tf
.
constant
(
3
,
tf
.
int32
)
num_valid_elementss
=
tf
.
constant
((
1
,
2
),
tf
.
int32
)
valid_mask
=
context_rcnn_lib
.
compute_valid_mask
(
num_valid_elementss
,
num_elements
)
expected_valid_mask
=
tf
.
constant
([[
1
,
0
,
0
],
[
1
,
1
,
0
]],
tf
.
float32
)
self
.
assertAllEqual
(
valid_mask
,
expected_valid_mask
)
def
test_filter_weight_value
(
self
):
weights
=
tf
.
ones
((
2
,
3
,
2
),
tf
.
float32
)
*
4
values
=
tf
.
ones
((
2
,
2
,
4
),
tf
.
float32
)
valid_mask
=
tf
.
constant
([[
True
,
True
],
[
True
,
False
]],
tf
.
bool
)
filtered_weights
,
filtered_values
=
context_rcnn_lib
.
filter_weight_value
(
weights
,
values
,
valid_mask
)
expected_weights
=
tf
.
constant
([[[
4
,
4
],
[
4
,
4
],
[
4
,
4
]],
[[
4
,
_NEGATIVE_PADDING_VALUE
+
4
],
[
4
,
_NEGATIVE_PADDING_VALUE
+
4
],
[
4
,
_NEGATIVE_PADDING_VALUE
+
4
]]])
expected_values
=
tf
.
constant
([[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
[[
1
,
1
,
1
,
1
],
[
0
,
0
,
0
,
0
]]])
self
.
assertAllEqual
(
filtered_weights
,
expected_weights
)
self
.
assertAllEqual
(
filtered_values
,
expected_values
)
# Changes the valid_mask so the results will be different.
valid_mask
=
tf
.
constant
([[
True
,
True
],
[
False
,
False
]],
tf
.
bool
)
filtered_weights
,
filtered_values
=
context_rcnn_lib
.
filter_weight_value
(
weights
,
values
,
valid_mask
)
expected_weights
=
tf
.
constant
(
[[[
4
,
4
],
[
4
,
4
],
[
4
,
4
]],
[[
_NEGATIVE_PADDING_VALUE
+
4
,
_NEGATIVE_PADDING_VALUE
+
4
],
[
_NEGATIVE_PADDING_VALUE
+
4
,
_NEGATIVE_PADDING_VALUE
+
4
],
[
_NEGATIVE_PADDING_VALUE
+
4
,
_NEGATIVE_PADDING_VALUE
+
4
]]])
expected_values
=
tf
.
constant
([[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
self
.
assertAllEqual
(
filtered_weights
,
expected_weights
)
self
.
assertAllEqual
(
filtered_values
,
expected_values
)
@
parameterized
.
parameters
((
2
,
True
,
True
),
(
2
,
False
,
True
),
(
10
,
True
,
False
),
(
10
,
False
,
False
))
def
test_project_features
(
self
,
projection_dimension
,
is_training
,
normalize
):
features
=
tf
.
ones
([
2
,
3
,
4
],
tf
.
float32
)
projected_features
=
context_rcnn_lib
.
project_features
(
features
,
projection_dimension
,
is_training
,
context_rcnn_lib
.
ContextProjection
(
projection_dimension
),
normalize
=
normalize
)
# Makes sure the shape is correct.
self
.
assertAllEqual
(
projected_features
.
shape
,
[
2
,
3
,
projection_dimension
])
@
parameterized
.
parameters
(
(
2
,
10
,
1
),
(
3
,
10
,
2
),
(
4
,
None
,
3
),
(
5
,
20
,
4
),
(
7
,
None
,
5
),
)
def
test_attention_block
(
self
,
bottleneck_dimension
,
output_dimension
,
attention_temperature
):
input_features
=
tf
.
ones
([
2
,
8
,
3
,
3
,
3
],
tf
.
float32
)
context_features
=
tf
.
ones
([
2
,
20
,
10
],
tf
.
float32
)
attention_block
=
context_rcnn_lib
.
AttentionBlock
(
bottleneck_dimension
,
attention_temperature
,
output_dimension
=
output_dimension
,
is_training
=
False
)
valid_context_size
=
tf
.
random_uniform
((
2
,),
minval
=
0
,
maxval
=
10
,
dtype
=
tf
.
int32
)
output_features
=
attention_block
(
input_features
,
context_features
,
valid_context_size
)
# Makes sure the shape is correct.
self
.
assertAllEqual
(
output_features
.
shape
,
[
2
,
8
,
1
,
1
,
(
output_dimension
or
3
)])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
View file @
2ae9c3a6
...
...
@@ -27,7 +27,9 @@ import functools
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.meta_architectures
import
context_rcnn_lib
from
object_detection.meta_architectures
import
context_rcnn_lib_tf2
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
object_detection.utils
import
tf_version
class
ContextRCNNMetaArch
(
faster_rcnn_meta_arch
.
FasterRCNNMetaArch
):
...
...
@@ -264,11 +266,17 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
return_raw_detections_during_predict
),
output_final_box_features
=
output_final_box_features
)
self
.
_context_feature_extract_fn
=
functools
.
partial
(
context_rcnn_lib
.
compute_box_context_attention
,
bottleneck_dimension
=
attention_bottleneck_dimension
,
attention_temperature
=
attention_temperature
,
is_training
=
is_training
)
if
tf_version
.
is_tf1
():
self
.
_context_feature_extract_fn
=
functools
.
partial
(
context_rcnn_lib
.
compute_box_context_attention
,
bottleneck_dimension
=
attention_bottleneck_dimension
,
attention_temperature
=
attention_temperature
,
is_training
=
is_training
)
else
:
self
.
_context_feature_extract_fn
=
context_rcnn_lib_tf2
.
AttentionBlock
(
bottleneck_dimension
=
attention_bottleneck_dimension
,
attention_temperature
=
attention_temperature
,
is_training
=
is_training
)
@
staticmethod
def
get_side_inputs
(
features
):
...
...
@@ -323,6 +331,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Returns:
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
box_features
=
self
.
_crop_and_resize_fn
(
[
features_to_crop
],
proposal_boxes_normalized
,
None
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch_
tf1_
test.py
→
research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
View file @
2ae9c3a6
...
...
@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor(
])
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
class
ContextRCNNMetaArchTest
(
test_case
.
TestCase
,
parameterized
.
TestCase
):
def
_get_model
(
self
,
box_predictor
,
**
common_kwargs
):
...
...
@@ -440,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic
=
masks_are_class_agnostic
,
share_box_across_classes
=
share_box_across_classes
),
**
common_kwargs
)
@
unittest
.
skipIf
(
tf_version
.
is_tf2
(),
'Skipping TF1.X only test.'
)
@
mock
.
patch
.
object
(
context_rcnn_meta_arch
,
'context_rcnn_lib'
)
def
test_prediction_mock
(
self
,
mock_context_rcnn_lib
):
"""Mocks the context_rcnn_lib module to test the prediction.
def
test_prediction_mock
_tf1
(
self
,
mock_context_rcnn_lib
_v1
):
"""Mocks the context_rcnn_lib
_v1
module to test the prediction.
Using mock object so that we can ensure compute_box_context_attention is
called in side the prediction function.
Args:
mock_context_rcnn_lib: mock module for the context_rcnn_lib.
mock_context_rcnn_lib
_v1
: mock module for the context_rcnn_lib
_v1
.
"""
model
=
self
.
_build_model
(
is_training
=
False
,
...
...
@@ -457,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
num_classes
=
42
)
mock_tensor
=
tf
.
ones
([
2
,
8
,
3
,
3
,
3
],
tf
.
float32
)
mock_context_rcnn_lib
.
compute_box_context_attention
.
return_value
=
mock_tensor
mock_context_rcnn_lib
_v1
.
compute_box_context_attention
.
return_value
=
mock_tensor
inputs_shape
=
(
2
,
20
,
20
,
3
)
inputs
=
tf
.
cast
(
tf
.
random_uniform
(
inputs_shape
,
minval
=
0
,
maxval
=
255
,
dtype
=
tf
.
int32
),
...
...
@@ -479,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
side_inputs
=
model
.
get_side_inputs
(
features
)
_
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
,
**
side_inputs
)
mock_context_rcnn_lib
.
compute_box_context_attention
.
assert_called_once
()
mock_context_rcnn_lib
_v1
.
compute_box_context_attention
.
assert_called_once
()
@
parameterized
.
named_parameters
(
{
'testcase_name'
:
'static_shapes'
,
'static_shapes'
:
True
},
...
...
@@ -518,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
}
side_inputs
=
model
.
get_side_inputs
(
features
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
,
**
side_inputs
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
...
...
research/object_detection/model_lib_v2.py
View file @
2ae9c3a6
...
...
@@ -117,7 +117,8 @@ def _compute_losses_and_predictions_dicts(
prediction_dict
=
model
.
predict
(
preprocessed_images
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
features
[
fields
.
InputDataFields
.
true_image_shape
],
**
model
.
get_side_inputs
(
features
))
prediction_dict
=
ops
.
bfloat16_to_float32_nested
(
prediction_dict
)
losses_dict
=
model
.
loss
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment