Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
cbd607ab
Commit
cbd607ab
authored
Jul 16, 2020
by
Kaushik Shivakumar
Browse files
make significant fixes suggested
parent
e7667f6f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
265 deletions
+36
-265
research/object_detection/meta_architectures/context_rcnn_lib_v1.py
...bject_detection/meta_architectures/context_rcnn_lib_v1.py
+0
-225
research/object_detection/meta_architectures/context_rcnn_lib_v2.py
...bject_detection/meta_architectures/context_rcnn_lib_v2.py
+25
-26
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
...ct_detection/meta_architectures/context_rcnn_meta_arch.py
+11
-14
No files found.
research/object_detection/meta_architectures/context_rcnn_lib_v1.py
deleted
100644 → 0
View file @
e7667f6f
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Library functions for ContextRCNN."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
import
tf_slim
as
slim
# The negative value used in padding the invalid weights.
_NEGATIVE_PADDING_VALUE
=
-
100000
def
filter_weight_value
(
weights
,
values
,
valid_mask
):
"""Filters weights and values based on valid_mask.
_NEGATIVE_PADDING_VALUE will be added to invalid elements in the weights to
avoid their contribution in softmax. 0 will be set for the invalid elements in
the values.
Args:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
valid_mask: A boolean Tensor of shape [batch_size, context_size]. True means
valid and False means invalid.
Returns:
weights: A float Tensor of shape [batch_size, input_size, context_size].
values: A float Tensor of shape [batch_size, context_size,
projected_dimension].
Raises:
ValueError: If shape of doesn't match.
"""
w_batch_size
,
_
,
w_context_size
=
weights
.
shape
v_batch_size
,
v_context_size
,
_
=
values
.
shape
m_batch_size
,
m_context_size
=
valid_mask
.
shape
if
w_batch_size
!=
v_batch_size
or
v_batch_size
!=
m_batch_size
:
raise
ValueError
(
"Please make sure the first dimension of the input"
" tensors are the same."
)
if
w_context_size
!=
v_context_size
:
raise
ValueError
(
"Please make sure the third dimension of weights matches"
" the second dimension of values."
)
if
w_context_size
!=
m_context_size
:
raise
ValueError
(
"Please make sure the third dimension of the weights"
" matches the second dimension of the valid_mask."
)
valid_mask
=
valid_mask
[...,
tf
.
newaxis
]
# Force the invalid weights to be very negative so it won't contribute to
# the softmax.
weights
+=
tf
.
transpose
(
tf
.
cast
(
tf
.
math
.
logical_not
(
valid_mask
),
weights
.
dtype
)
*
_NEGATIVE_PADDING_VALUE
,
perm
=
[
0
,
2
,
1
])
# Force the invalid values to be 0.
values
*=
tf
.
cast
(
valid_mask
,
values
.
dtype
)
return
weights
,
values
def
compute_valid_mask
(
num_valid_elements
,
num_elements
):
"""Computes mask of valid entries within padded context feature.
Args:
num_valid_elements: A int32 Tensor of shape [batch_size].
num_elements: An int32 Tensor.
Returns:
A boolean Tensor of the shape [batch_size, num_elements]. True means
valid and False means invalid.
"""
batch_size
=
num_valid_elements
.
shape
[
0
]
element_idxs
=
tf
.
range
(
num_elements
,
dtype
=
tf
.
int32
)
batch_element_idxs
=
tf
.
tile
(
element_idxs
[
tf
.
newaxis
,
...],
[
batch_size
,
1
])
num_valid_elements
=
num_valid_elements
[...,
tf
.
newaxis
]
valid_mask
=
tf
.
less
(
batch_element_idxs
,
num_valid_elements
)
return
valid_mask
def
project_features
(
features
,
projection_dimension
,
is_training
,
normalize
):
"""Projects features to another feature space.
Args:
features: A float Tensor of shape [batch_size, features_size,
num_features].
projection_dimension: A int32 Tensor.
is_training: A boolean Tensor (affecting batch normalization).
normalize: A boolean Tensor. If true, the output features will be l2
normalized on the last dimension.
Returns:
A float Tensor of shape [batch, features_size, projection_dimension].
"""
# TODO(guanhangwu) Figure out a better way of specifying the batch norm
# params.
batch_norm_params
=
{
"is_training"
:
is_training
,
"decay"
:
0.97
,
"epsilon"
:
0.001
,
"center"
:
True
,
"scale"
:
True
}
batch_size
,
_
,
num_features
=
features
.
shape
features
=
tf
.
reshape
(
features
,
[
-
1
,
num_features
])
projected_features
=
slim
.
fully_connected
(
features
,
num_outputs
=
projection_dimension
,
activation_fn
=
tf
.
nn
.
relu6
,
normalizer_fn
=
slim
.
batch_norm
,
normalizer_params
=
batch_norm_params
)
projected_features
=
tf
.
reshape
(
projected_features
,
[
batch_size
,
-
1
,
projection_dimension
])
if
normalize
:
projected_features
=
tf
.
math
.
l2_normalize
(
projected_features
,
axis
=-
1
)
return
projected_features
def
attention_block
(
input_features
,
context_features
,
bottleneck_dimension
,
output_dimension
,
attention_temperature
,
valid_mask
,
is_training
):
"""Generic attention block.
Args:
input_features: A float Tensor of shape [batch_size, input_size,
num_input_features].
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
output_dimension: A int32 Tensor representing the last dimension of the
output feature.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
valid_mask: A boolean Tensor of shape [batch_size, context_size].
is_training: A boolean Tensor (affecting batch normalization).
Returns:
A float Tensor of shape [batch_size, input_size, output_dimension].
"""
with
tf
.
variable_scope
(
"AttentionBlock"
):
queries
=
project_features
(
input_features
,
bottleneck_dimension
,
is_training
,
normalize
=
True
)
keys
=
project_features
(
context_features
,
bottleneck_dimension
,
is_training
,
normalize
=
True
)
values
=
project_features
(
context_features
,
bottleneck_dimension
,
is_training
,
normalize
=
True
)
weights
=
tf
.
matmul
(
queries
,
keys
,
transpose_b
=
True
)
weights
,
values
=
filter_weight_value
(
weights
,
values
,
valid_mask
)
weights
=
tf
.
nn
.
softmax
(
weights
/
attention_temperature
)
features
=
tf
.
matmul
(
weights
,
values
)
output_features
=
project_features
(
features
,
output_dimension
,
is_training
,
normalize
=
False
)
return
output_features
def
compute_box_context_attention
(
box_features
,
context_features
,
valid_context_size
,
bottleneck_dimension
,
attention_temperature
,
is_training
):
"""Computes the attention feature from the context given a batch of box.
Args:
box_features: A float Tensor of shape [batch_size, max_num_proposals,
height, width, channels]. It is pooled features from first stage
proposals.
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
bottleneck_dimension: A int32 Tensor representing the bottleneck dimension
for intermediate projections.
attention_temperature: A float Tensor. It controls the temperature of the
softmax for weights calculation. The formula for calculation as follows:
weights = exp(weights / temperature) / sum(exp(weights / temperature))
is_training: A boolean Tensor (affecting batch normalization).
Returns:
A float Tensor of shape [batch_size, max_num_proposals, 1, 1, channels].
"""
_
,
context_size
,
_
=
context_features
.
shape
valid_mask
=
compute_valid_mask
(
valid_context_size
,
context_size
)
channels
=
box_features
.
shape
[
-
1
]
# Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels].
box_features
=
tf
.
reduce_mean
(
box_features
,
[
2
,
3
])
output_features
=
attention_block
(
box_features
,
context_features
,
bottleneck_dimension
,
channels
.
value
,
attention_temperature
,
valid_mask
,
is_training
)
# Expands the dimension back to match with the original feature map.
output_features
=
output_features
[:,
:,
tf
.
newaxis
,
tf
.
newaxis
,
:]
return
output_features
\ No newline at end of file
research/object_detection/meta_architectures/context_rcnn_lib_v2.py
View file @
cbd607ab
...
@@ -53,36 +53,35 @@ class AttentionBlock(tf.keras.layers.Layer):
...
@@ -53,36 +53,35 @@ class AttentionBlock(tf.keras.layers.Layer):
"""Custom layer to perform all attention."""
"""Custom layer to perform all attention."""
def
__init__
(
self
,
bottleneck_dimension
,
attention_temperature
,
def
__init__
(
self
,
bottleneck_dimension
,
attention_temperature
,
freeze_batchnorm
,
output_dimension
=
None
,
**
kwargs
):
freeze_batchnorm
,
output_dimension
=
None
,
**
kwargs
):
self
.
key_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
_key_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
val_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
_val_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
query_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
_query_proj
=
ContextProjection
(
bottleneck_dimension
,
freeze_batchnorm
)
self
.
feature_proj
=
None
self
.
_feature_proj
=
None
self
.
attention_temperature
=
attention_temperature
self
.
_attention_temperature
=
attention_temperature
self
.
freeze_batchnorm
=
freeze_batchnorm
self
.
_freeze_batchnorm
=
freeze_batchnorm
self
.
bottleneck_dimension
=
bottleneck_dimension
self
.
_bottleneck_dimension
=
bottleneck_dimension
self
.
output_dimension
=
output_dimension
self
.
_output_dimension
=
output_dimension
self
.
_is_training
=
is_training
super
(
AttentionBlock
,
self
).
__init__
(
**
kwargs
)
super
(
AttentionBlock
,
self
).
__init__
(
**
kwargs
)
def
set_output_dimension
(
self
,
output_dim
):
def
set_output_dimension
(
self
,
output_dim
):
self
.
output_dimension
=
output_dim
self
.
_
output_dimension
=
output_dim
def
build
(
self
,
input_shapes
):
def
build
(
self
,
input_shapes
):
pass
pass
def
call
(
self
,
input_features
,
is_training
,
valid_context_size
):
def
call
(
self
,
input_features
,
context_features
,
valid_context_size
):
"""Handles a call by performing attention."""
"""Handles a call by performing attention."""
input_features
,
context_features
=
input_features
_
,
context_size
,
_
=
context_features
.
shape
_
,
context_size
,
_
=
context_features
.
shape
valid_mask
=
compute_valid_mask
(
valid_context_size
,
context_size
)
valid_mask
=
compute_valid_mask
(
valid_context_size
,
context_size
)
channels
=
input_features
.
shape
[
-
1
]
channels
=
input_features
.
shape
[
-
1
]
#Build the feature projection layer
#Build the feature projection layer
if
(
not
self
.
output_dimension
):
if
(
not
self
.
_
output_dimension
):
self
.
output_dimension
=
channels
self
.
_
output_dimension
=
channels
if
(
not
self
.
feature_proj
):
if
(
not
self
.
_
feature_proj
):
self
.
feature_proj
=
ContextProjection
(
self
.
output_dimension
,
self
.
_
feature_proj
=
ContextProjection
(
self
.
_
output_dimension
,
self
.
freeze_batchnorm
)
self
.
_
freeze_batchnorm
)
# Average pools over height and width dimension so that the shape of
# Average pools over height and width dimension so that the shape of
# box_features becomes [batch_size, max_num_proposals, channels].
# box_features becomes [batch_size, max_num_proposals, channels].
...
@@ -90,25 +89,25 @@ class AttentionBlock(tf.keras.layers.Layer):
...
@@ -90,25 +89,25 @@ class AttentionBlock(tf.keras.layers.Layer):
with
tf
.
variable_scope
(
"AttentionBlock"
):
with
tf
.
variable_scope
(
"AttentionBlock"
):
queries
=
project_features
(
queries
=
project_features
(
input_features
,
self
.
bottleneck_dimension
,
is_training
,
input_features
,
self
.
_
bottleneck_dimension
,
self
.
_
is_training
,
self
.
query_proj
,
normalize
=
True
)
self
.
_
query_proj
,
normalize
=
True
)
keys
=
project_features
(
keys
=
project_features
(
context_features
,
self
.
bottleneck_dimension
,
is_training
,
context_features
,
self
.
_
bottleneck_dimension
,
self
.
_
is_training
,
self
.
key_proj
,
normalize
=
True
)
self
.
_
key_proj
,
normalize
=
True
)
values
=
project_features
(
values
=
project_features
(
context_features
,
self
.
bottleneck_dimension
,
is_training
,
context_features
,
self
.
_
bottleneck_dimension
,
self
.
_
is_training
,
self
.
val_proj
,
normalize
=
True
)
self
.
_
val_proj
,
normalize
=
True
)
weights
=
tf
.
matmul
(
queries
,
keys
,
transpose_b
=
True
)
weights
=
tf
.
matmul
(
queries
,
keys
,
transpose_b
=
True
)
weights
,
values
=
filter_weight_value
(
weights
,
values
,
valid_mask
)
weights
,
values
=
filter_weight_value
(
weights
,
values
,
valid_mask
)
weights
=
tf
.
nn
.
softmax
(
weights
/
self
.
attention_temperature
)
weights
=
tf
.
nn
.
softmax
(
weights
/
self
.
_
attention_temperature
)
features
=
tf
.
matmul
(
weights
,
values
)
features
=
tf
.
matmul
(
weights
,
values
)
output_features
=
project_features
(
output_features
=
project_features
(
features
,
self
.
output_dimension
,
is_training
,
features
,
self
.
_
output_dimension
,
self
.
_
is_training
,
self
.
feature_proj
,
normalize
=
False
)
self
.
_
feature_proj
,
normalize
=
False
)
output_features
=
output_features
[:,
:,
tf
.
newaxis
,
tf
.
newaxis
,
:]
output_features
=
output_features
[:,
:,
tf
.
newaxis
,
tf
.
newaxis
,
:]
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
View file @
cbd607ab
...
@@ -26,7 +26,8 @@ from __future__ import print_function
...
@@ -26,7 +26,8 @@ from __future__ import print_function
import
functools
import
functools
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.meta_architectures
import
context_rcnn_lib
,
context_rcnn_lib_v2
from
object_detection.meta_architectures
import
context_rcnn_lib
from
object_detection.meta_architectures
import
context_rcnn_lib_v2
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch
from
object_detection.utils
import
tf_version
from
object_detection.utils
import
tf_version
...
@@ -271,10 +272,11 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -271,10 +272,11 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
attention_temperature
=
attention_temperature
,
attention_temperature
=
attention_temperature
,
is_training
=
is_training
)
is_training
=
is_training
)
else
:
else
:
self
.
_attention_block
=
context_rcnn_lib_v2
.
AttentionBlock
(
self
.
_context_feature_extract_fn
=
context_rcnn_lib_v2
.
AttentionBlock
(
attention_bottleneck_dimension
,
attention_temperature
,
bottleneck_dimension
=
attention_bottleneck_dimension
,
freeze_batchnorm
)
attention_temperature
=
attention_temperature
,
self
.
_is_training
=
is_training
freeze_batchnorm
=
freeze_batchnorm
,
is_training
=
is_training
)
@
staticmethod
@
staticmethod
def
get_side_inputs
(
features
):
def
get_side_inputs
(
features
):
...
@@ -335,15 +337,10 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -335,15 +337,10 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
features_to_crop
,
proposal_boxes_normalized
,
features_to_crop
,
proposal_boxes_normalized
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
if
tf_version
.
is_tf1
():
attention_features
=
self
.
_context_feature_extract_fn
(
attention_features
=
self
.
_context_feature_extract_fn
(
box_features
=
box_features
,
box_features
=
box_features
,
context_features
=
context_features
,
context_features
=
context_features
,
valid_context_size
=
valid_context_size
)
valid_context_size
=
valid_context_size
)
else
:
attention_features
=
self
.
_attention_block
(
[
box_features
,
context_features
],
self
.
_is_training
,
valid_context_size
)
# Adds box features with attention features.
# Adds box features with attention features.
box_features
+=
attention_features
box_features
+=
attention_features
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment