Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
27b4acd4
Commit
27b4acd4
authored
Sep 25, 2018
by
Aman Gupta
Browse files
Merge remote-tracking branch 'upstream/master'
parents
5133522f
d4e1f97f
Changes
240
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1817 additions
and
19 deletions
+1817
-19
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
+183
-0
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py
...ion/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py
+206
-0
research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
...ection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+160
-0
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
...t_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+14
-2
research/object_detection/object_detection_tutorial.ipynb
research/object_detection/object_detection_tutorial.ipynb
+3
-7
research/object_detection/predictors/convolutional_box_predictor.py
...bject_detection/predictors/convolutional_box_predictor.py
+12
-3
research/object_detection/predictors/convolutional_box_predictor_test.py
..._detection/predictors/convolutional_box_predictor_test.py
+107
-0
research/object_detection/predictors/convolutional_keras_box_predictor.py
...detection/predictors/convolutional_keras_box_predictor.py
+188
-0
research/object_detection/predictors/convolutional_keras_box_predictor_test.py
...tion/predictors/convolutional_keras_box_predictor_test.py
+195
-0
research/object_detection/predictors/heads/box_head.py
research/object_detection/predictors/heads/box_head.py
+18
-4
research/object_detection/predictors/heads/class_head.py
research/object_detection/predictors/heads/class_head.py
+17
-2
research/object_detection/predictors/heads/head.py
research/object_detection/predictors/heads/head.py
+22
-0
research/object_detection/predictors/heads/keras_box_head.py
research/object_detection/predictors/heads/keras_box_head.py
+124
-0
research/object_detection/predictors/heads/keras_box_head_test.py
.../object_detection/predictors/heads/keras_box_head_test.py
+62
-0
research/object_detection/predictors/heads/keras_class_head.py
...rch/object_detection/predictors/heads/keras_class_head.py
+148
-0
research/object_detection/predictors/heads/keras_class_head_test.py
...bject_detection/predictors/heads/keras_class_head_test.py
+65
-0
research/object_detection/predictors/heads/keras_mask_head.py
...arch/object_detection/predictors/heads/keras_mask_head.py
+158
-0
research/object_detection/predictors/heads/keras_mask_head_test.py
...object_detection/predictors/heads/keras_mask_head_test.py
+90
-0
research/object_detection/predictors/heads/mask_head.py
research/object_detection/predictors/heads/mask_head.py
+1
-0
research/object_detection/protos/box_predictor.proto
research/object_detection/protos/box_predictor.proto
+44
-1
No files found.
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD MobilenetV2 FPN Feature Extractor."""
import
copy
import
functools
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
context_manager
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
from
nets.mobilenet
import
mobilenet
from
nets.mobilenet
import
mobilenet_v2
slim
=
tf
.
contrib
.
slim
# A modified config of mobilenet v2 that makes it more detection friendly,
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v2
.
V2_DEF
)
conv_defs
[
'spec'
][
-
1
]
=
mobilenet
.
op
(
slim
.
conv2d
,
stride
=
1
,
kernel_size
=
[
1
,
1
],
num_outputs
=
256
)
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV2FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using MobilenetV2 FPN features."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams_fn
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
override_base_feature_extractor_hyperparams
=
False
):
"""SSD FPN feature extractor based on Mobilenet v2 architecture.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the base
feature extractor.
fpn_min_level: the highest resolution feature map to use in FPN. The valid
values are {2, 3, 4, 5} which map to MobileNet v2 layers
{layer_4, layer_7, layer_14, layer_19}, respectively.
fpn_max_level: the smallest resolution feature map to construct or use in
FPN. FPN constructions uses features maps starting from fpn_min_level
upto the fpn_max_level. In the case that there are not enough feature
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of fpn
levels.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super
(
SSDMobileNetV2FpnFeatureExtractor
,
self
).
__init__
(
is_training
=
is_training
,
depth_multiplier
=
depth_multiplier
,
min_depth
=
min_depth
,
pad_to_multiple
=
pad_to_multiple
,
conv_hyperparams_fn
=
conv_hyperparams_fn
,
reuse_weights
=
reuse_weights
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
override_base_feature_extractor_hyperparams
=
override_base_feature_extractor_hyperparams
)
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
33
,
preprocessed_inputs
)
with
tf
.
variable_scope
(
'MobilenetV2'
,
reuse
=
self
.
_reuse_weights
)
as
scope
:
with
slim
.
arg_scope
(
mobilenet_v2
.
training_scope
(
is_training
=
None
,
bn_decay
=
0.9997
)),
\
slim
.
arg_scope
(
[
mobilenet
.
depth_multiplier
],
min_depth
=
self
.
_min_depth
):
with
(
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
())
if
self
.
_override_base_feature_extractor_hyperparams
else
context_manager
.
IdentityContextManager
()):
_
,
image_features
=
mobilenet_v2
.
mobilenet_base
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'layer_19'
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
depth_fn
=
lambda
d
:
max
(
int
(
d
*
self
.
_depth_multiplier
),
self
.
_min_depth
)
with
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
()):
with
tf
.
variable_scope
(
'fpn'
,
reuse
=
self
.
_reuse_weights
):
feature_blocks
=
[
'layer_4'
,
'layer_7'
,
'layer_14'
,
'layer_19'
]
base_fpn_max_level
=
min
(
self
.
_fpn_max_level
,
5
)
feature_block_list
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_block_list
.
append
(
feature_blocks
[
level
-
2
])
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
level
-
2
])])
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
last_feature_map
=
conv_op
(
last_feature_map
,
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
stride
=
2
,
padding
=
'SAME'
,
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
19
))
feature_maps
.
append
(
last_feature_map
)
return
feature_maps
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_mobilenet_v2_fpn_feature_extractor."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
ssd_feature_extractor_test
from
object_detection.models
import
ssd_mobilenet_v2_fpn_feature_extractor
slim
=
tf
.
contrib
.
slim
class
SsdMobilenetV2FpnFeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
use_explicit_padding
=
False
):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth
=
32
return
(
ssd_mobilenet_v2_fpn_feature_extractor
.
SSDMobileNetV2FpnFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
))
def
test_extract_features_returns_correct_shapes_256
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
32
,
32
,
256
),
(
2
,
16
,
16
,
256
),
(
2
,
8
,
8
,
256
),
(
2
,
4
,
4
,
256
),
(
2
,
2
,
2
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
False
)
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_extract_features_returns_correct_shapes_384
(
self
):
image_height
=
320
image_width
=
320
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
40
,
40
,
256
),
(
2
,
20
,
20
,
256
),
(
2
,
10
,
10
,
256
),
(
2
,
5
,
5
,
256
),
(
2
,
3
,
3
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
False
)
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_extract_features_with_dynamic_image_shape
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
32
,
32
,
256
),
(
2
,
16
,
16
,
256
),
(
2
,
8
,
8
,
256
),
(
2
,
4
,
4
,
256
),
(
2
,
2
,
2
,
256
)]
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
False
)
self
.
check_extract_features_returns_correct_shapes_with_dynamic_inputs
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_extract_features_returns_correct_shapes_with_pad_to_multiple
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
1.0
pad_to_multiple
=
32
expected_feature_map_shape
=
[(
2
,
40
,
40
,
256
),
(
2
,
20
,
20
,
256
),
(
2
,
10
,
10
,
256
),
(
2
,
5
,
5
,
256
),
(
2
,
3
,
3
,
256
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
False
)
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_extract_features_returns_correct_shapes_enforcing_min_depth
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
0.5
**
12
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
32
,
32
,
32
),
(
2
,
16
,
16
,
32
),
(
2
,
8
,
8
,
32
),
(
2
,
4
,
4
,
32
),
(
2
,
2
,
2
,
32
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
False
)
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
,
use_explicit_padding
=
True
)
def
test_extract_features_raises_error_with_invalid_image_size
(
self
):
image_height
=
32
image_width
=
32
depth_multiplier
=
1.0
pad_to_multiple
=
1
self
.
check_extract_features_raises_error_with_invalid_image_size
(
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
)
def
test_preprocess_returns_correct_value_range
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
2
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
pad_to_multiple
=
1
scope_name
=
'MobilenetV2'
self
.
check_feature_extractor_variables_under_scope
(
depth_multiplier
,
pad_to_multiple
,
scope_name
)
def
test_fused_batchnorm
(
self
):
image_height
=
256
image_width
=
256
depth_multiplier
=
1
pad_to_multiple
=
1
image_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
[
1
,
image_height
,
image_width
,
3
])
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
image_placeholder
)
_
=
feature_extractor
.
extract_features
(
preprocessed_image
)
self
.
assertTrue
(
any
(
op
.
type
==
'FusedBatchNorm'
for
op
in
tf
.
get_default_graph
().
get_operations
()))
def
test_get_expected_feature_map_variable_names
(
self
):
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_maps_variables
=
set
([
# Mobilenet V2 feature maps
'MobilenetV2/expanded_conv_4/depthwise/depthwise_weights'
,
'MobilenetV2/expanded_conv_7/depthwise/depthwise_weights'
,
'MobilenetV2/expanded_conv_14/depthwise/depthwise_weights'
,
'MobilenetV2/Conv_1/weights'
,
# FPN layers
'MobilenetV2/fpn/bottom_up_Conv2d_20/weights'
,
'MobilenetV2/fpn/bottom_up_Conv2d_21/weights'
,
'MobilenetV2/fpn/smoothing_1/weights'
,
'MobilenetV2/fpn/smoothing_2/weights'
,
'MobilenetV2/fpn/projection_1/weights'
,
'MobilenetV2/fpn/projection_2/weights'
,
'MobilenetV2/fpn/projection_3/weights'
,
])
g
=
tf
.
Graph
()
with
g
.
as_default
():
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
(
4
,
None
,
None
,
3
))
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
feature_extractor
.
extract_features
(
preprocessed_inputs
)
actual_variable_set
=
set
([
var
.
op
.
name
for
var
in
g
.
get_collection
(
tf
.
GraphKeys
.
GLOBAL_VARIABLES
)
])
variable_intersection
=
expected_feature_maps_variables
.
intersection
(
actual_variable_set
)
self
.
assertSetEqual
(
expected_feature_maps_variables
,
variable_intersection
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for MobilenetV2 features."""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.models.keras_applications
import
mobilenet_v2
from
object_detection.utils
import
ops
from
object_detection.utils
import
shape_utils
class
SSDMobileNetV2KerasFeatureExtractor
(
ssd_meta_arch
.
SSDKerasFeatureExtractor
):
"""SSD Feature Extractor using MobilenetV2 features."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
freeze_batchnorm
,
inplace_batchnorm_update
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
override_base_feature_extractor_hyperparams
=
False
,
name
=
None
):
"""MobileNetV2 Feature Extractor for SSD Models.
Mobilenet v2 (experimental), designed by sandler@. More details can be found
in //knowledge/cerebra/brain/compression/mobilenet/mobilenet_experimental.py
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor (Functions
as a width multiplier for the mobilenet_v2 network itself).
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
name: A string name scope to assign to the model. If 'None', Keras
will auto-generate one from the class name.
"""
super
(
SSDMobileNetV2KerasFeatureExtractor
,
self
).
__init__
(
is_training
=
is_training
,
depth_multiplier
=
depth_multiplier
,
min_depth
=
min_depth
,
pad_to_multiple
=
pad_to_multiple
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
freeze_batchnorm
,
inplace_batchnorm_update
=
inplace_batchnorm_update
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
override_base_feature_extractor_hyperparams
=
override_base_feature_extractor_hyperparams
,
name
=
name
)
feature_map_layout
=
{
'from_layer'
:
[
'layer_15/expansion_output'
,
'layer_19'
,
''
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'use_depthwise'
:
self
.
_use_depthwise
,
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
}
with
tf
.
name_scope
(
'MobilenetV2'
):
full_mobilenet_v2
=
mobilenet_v2
.
mobilenet_v2
(
batchnorm_training
=
(
is_training
and
not
freeze_batchnorm
),
conv_hyperparams
=
(
conv_hyperparams
if
self
.
_override_base_feature_extractor_hyperparams
else
None
),
weights
=
None
,
use_explicit_padding
=
use_explicit_padding
,
alpha
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
include_top
=
False
)
conv2d_11_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'block_13_expand_relu'
).
output
conv2d_13_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'out_relu'
).
output
self
.
mobilenet_v2
=
tf
.
keras
.
Model
(
inputs
=
full_mobilenet_v2
.
inputs
,
outputs
=
[
conv2d_11_pointwise
,
conv2d_13_pointwise
])
self
.
feature_map_generator
=
(
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
is_training
=
is_training
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
freeze_batchnorm
,
name
=
'FeatureMaps'
))
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
_extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
preprocessed_inputs
=
shape_utils
.
check_min_image_dim
(
33
,
preprocessed_inputs
)
image_features
=
self
.
mobilenet_v2
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
))
feature_maps
=
self
.
feature_map_generator
({
'layer_15/expansion_output'
:
image_features
[
0
],
'layer_19'
:
image_features
[
1
]})
return
feature_maps
.
values
()
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
View file @
27b4acd4
...
@@ -43,6 +43,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -43,6 +43,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_scope_name
,
fpn_scope_name
,
fpn_min_level
=
3
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
...
@@ -72,6 +73,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -72,6 +73,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
maps in the backbone network, additional feature maps are created by
maps in the backbone network, additional feature maps are created by
applying stride 2 convolutions until we get the desired number of fpn
applying stride 2 convolutions until we get the desired number of fpn
levels.
levels.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
features. Default is False. UNUSED currently.
...
@@ -104,6 +106,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -104,6 +106,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self
.
_fpn_scope_name
=
fpn_scope_name
self
.
_fpn_scope_name
=
fpn_scope_name
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -177,7 +180,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -177,7 +180,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
feature_block_list
.
append
(
'block{}'
.
format
(
level
-
1
))
feature_block_list
.
append
(
'block{}'
.
format
(
level
-
1
))
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
256
)
depth
=
self
.
_additional_layer_depth
)
feature_maps
=
[]
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
feature_maps
.
append
(
...
@@ -188,7 +191,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -188,7 +191,7 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
for
i
in
range
(
base_fpn_max_level
,
self
.
_fpn_max_level
):
for
i
in
range
(
base_fpn_max_level
,
self
.
_fpn_max_level
):
last_feature_map
=
slim
.
conv2d
(
last_feature_map
=
slim
.
conv2d
(
last_feature_map
,
last_feature_map
,
num_outputs
=
256
,
num_outputs
=
self
.
_additional_layer_depth
,
kernel_size
=
[
3
,
3
],
kernel_size
=
[
3
,
3
],
stride
=
2
,
stride
=
2
,
padding
=
'SAME'
,
padding
=
'SAME'
,
...
@@ -208,6 +211,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -208,6 +211,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
conv_hyperparams_fn
,
conv_hyperparams_fn
,
fpn_min_level
=
3
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
...
@@ -226,6 +230,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -226,6 +230,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
base feature extractor.
base feature extractor.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
features. Default is False. UNUSED currently.
...
@@ -245,6 +250,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -245,6 +250,7 @@ class SSDResnet50V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
'fpn'
,
'fpn'
,
fpn_min_level
,
fpn_min_level
,
fpn_max_level
,
fpn_max_level
,
additional_layer_depth
,
reuse_weights
=
reuse_weights
,
reuse_weights
=
reuse_weights
,
use_explicit_padding
=
use_explicit_padding
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
use_depthwise
=
use_depthwise
,
...
@@ -263,6 +269,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -263,6 +269,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
conv_hyperparams_fn
,
conv_hyperparams_fn
,
fpn_min_level
=
3
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
...
@@ -281,6 +288,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -281,6 +288,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
base feature extractor.
base feature extractor.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
features. Default is False. UNUSED currently.
...
@@ -300,6 +308,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -300,6 +308,7 @@ class SSDResnet101V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
'fpn'
,
'fpn'
,
fpn_min_level
,
fpn_min_level
,
fpn_max_level
,
fpn_max_level
,
additional_layer_depth
,
reuse_weights
=
reuse_weights
,
reuse_weights
=
reuse_weights
,
use_explicit_padding
=
use_explicit_padding
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
use_depthwise
=
use_depthwise
,
...
@@ -318,6 +327,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -318,6 +327,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
conv_hyperparams_fn
,
conv_hyperparams_fn
,
fpn_min_level
=
3
,
fpn_min_level
=
3
,
fpn_max_level
=
7
,
fpn_max_level
=
7
,
additional_layer_depth
=
256
,
reuse_weights
=
None
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
use_depthwise
=
False
,
...
@@ -336,6 +346,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -336,6 +346,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
base feature extractor.
base feature extractor.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_min_level: the minimum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
fpn_max_level: the maximum level in feature pyramid networks.
additional_layer_depth: additional feature map layer channel depth.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False. UNUSED currently.
features. Default is False. UNUSED currently.
...
@@ -355,6 +366,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
...
@@ -355,6 +366,7 @@ class SSDResnet152V1FpnFeatureExtractor(_SSDResnetV1FpnFeatureExtractor):
'fpn'
,
'fpn'
,
fpn_min_level
,
fpn_min_level
,
fpn_max_level
,
fpn_max_level
,
additional_layer_depth
,
reuse_weights
=
reuse_weights
,
reuse_weights
=
reuse_weights
,
use_explicit_padding
=
use_explicit_padding
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
use_depthwise
=
use_depthwise
,
...
...
research/object_detection/object_detection_tutorial.ipynb
View file @
27b4acd4
...
@@ -36,7 +36,6 @@
...
@@ -36,7 +36,6 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"from distutils.version import StrictVersion\n",
"import numpy as np\n",
"import numpy as np\n",
"import os\n",
"import os\n",
"import six.moves.urllib as urllib\n",
"import six.moves.urllib as urllib\n",
...
@@ -45,6 +44,7 @@
...
@@ -45,6 +44,7 @@
"import tensorflow as tf\n",
"import tensorflow as tf\n",
"import zipfile\n",
"import zipfile\n",
"\n",
"\n",
"from distutils.version import StrictVersion\n",
"from collections import defaultdict\n",
"from collections import defaultdict\n",
"from io import StringIO\n",
"from io import StringIO\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib import pyplot as plt\n",
...
@@ -166,9 +166,7 @@
...
@@ -166,9 +166,7 @@
"PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
"PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
"\n",
"\n",
"# List of the strings that is used to add correct label for each box.\n",
"# List of the strings that is used to add correct label for each box.\n",
"PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')\n",
"PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')"
"\n",
"NUM_CLASSES = 90"
]
]
},
},
{
{
...
@@ -265,9 +263,7 @@
...
@@ -265,9 +263,7 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
"category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
"categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
"category_index = label_map_util.create_category_index(categories)"
]
]
},
},
{
{
...
...
research/object_detection/predictors/convolutional_box_predictor.py
View file @
27b4acd4
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
# ==============================================================================
# ==============================================================================
"""Convolutional Box Predictors with and without weight sharing."""
"""Convolutional Box Predictors with and without weight sharing."""
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.core
import
box_predictor
from
object_detection.core
import
box_predictor
from
object_detection.utils
import
static_shape
from
object_detection.utils
import
static_shape
...
@@ -163,7 +164,7 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
...
@@ -163,7 +164,7 @@ class ConvolutionalBoxPredictor(box_predictor.BoxPredictor):
else
:
else
:
head_obj
=
self
.
_other_heads
[
head_name
]
head_obj
=
self
.
_other_heads
[
head_name
]
prediction
=
head_obj
.
predict
(
prediction
=
head_obj
.
predict
(
features
=
image_feature
,
features
=
net
,
num_predictions_per_location
=
num_predictions_per_location
)
num_predictions_per_location
=
num_predictions_per_location
)
predictions
[
head_name
].
append
(
prediction
)
predictions
[
head_name
].
append
(
prediction
)
return
predictions
return
predictions
...
@@ -203,7 +204,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
...
@@ -203,7 +204,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
num_layers_before_predictor
,
num_layers_before_predictor
,
kernel_size
=
3
,
kernel_size
=
3
,
apply_batch_norm
=
False
,
apply_batch_norm
=
False
,
share_prediction_tower
=
False
):
share_prediction_tower
=
False
,
use_depthwise
=
False
):
"""Constructor.
"""Constructor.
Args:
Args:
...
@@ -226,6 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
...
@@ -226,6 +228,8 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
this predictor.
this predictor.
share_prediction_tower: Whether to share the multi-layer tower between box
share_prediction_tower: Whether to share the multi-layer tower between box
prediction and class prediction heads.
prediction and class prediction heads.
use_depthwise: Whether to use depthwise separable conv2d instead of
regular conv2d.
"""
"""
super
(
WeightSharedConvolutionalBoxPredictor
,
self
).
__init__
(
is_training
,
super
(
WeightSharedConvolutionalBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
num_classes
)
...
@@ -238,6 +242,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
...
@@ -238,6 +242,7 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
self
.
_kernel_size
=
kernel_size
self
.
_kernel_size
=
kernel_size
self
.
_apply_batch_norm
=
apply_batch_norm
self
.
_apply_batch_norm
=
apply_batch_norm
self
.
_share_prediction_tower
=
share_prediction_tower
self
.
_share_prediction_tower
=
share_prediction_tower
self
.
_use_depthwise
=
use_depthwise
@
property
@
property
def
num_classes
(
self
):
def
num_classes
(
self
):
...
@@ -270,7 +275,11 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
...
@@ -270,7 +275,11 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.BoxPredictor):
inserted_layer_counter
):
inserted_layer_counter
):
net
=
image_feature
net
=
image_feature
for
i
in
range
(
self
.
_num_layers_before_predictor
):
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
net
=
conv_op
(
net
,
net
,
self
.
_depth
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
self
.
_depth
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
stride
=
1
,
stride
=
1
,
...
...
research/object_detection/predictors/convolutional_box_predictor_test.py
View file @
27b4acd4
...
@@ -234,6 +234,40 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
...
@@ -234,6 +234,40 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
'BoxPredictor/ClassPredictor/weights'
])
'BoxPredictor/ClassPredictor/weights'
])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
def
test_no_dangling_outputs
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
])
conv_box_predictor
=
(
box_predictor_builder
.
build_convolutional_box_predictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams_fn
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
,
use_dropout
=
True
,
use_depthwise
=
True
))
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features
],
num_predictions_per_location
=
[
5
],
scope
=
'BoxPredictor'
)
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
bad_dangling_ops
=
[]
types_safe_to_dangle
=
set
([
'Assign'
,
'Mul'
,
'Const'
])
for
op
in
tf
.
get_default_graph
().
get_operations
():
if
(
not
op
.
outputs
)
or
(
not
op
.
outputs
[
0
].
consumers
()):
if
'BoxPredictor'
in
op
.
name
:
if
op
.
type
not
in
types_safe_to_dangle
:
bad_dangling_ops
.
append
(
op
)
self
.
assertEqual
(
bad_dangling_ops
,
[])
class
WeightSharedConvolutionalBoxPredictorTest
(
test_case
.
TestCase
):
class
WeightSharedConvolutionalBoxPredictorTest
(
test_case
.
TestCase
):
...
@@ -545,6 +579,79 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
...
@@ -545,6 +579,79 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
'ClassPredictor/biases'
)])
'ClassPredictor/biases'
)])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
def
test_predictions_multiple_feature_maps_share_weights_with_depthwise
(
self
):
num_classes_without_background
=
6
def
graph_fn
(
image_features1
,
image_features2
):
conv_box_predictor
=
(
box_predictor_builder
.
build_weight_shared_convolutional_box_predictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams_fn
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
depth
=
32
,
num_layers_before_predictor
=
2
,
box_code_size
=
4
,
apply_batch_norm
=
False
,
use_depthwise
=
True
))
box_predictions
=
conv_box_predictor
.
predict
(
[
image_features1
,
image_features2
],
num_predictions_per_location
=
[
5
,
5
],
scope
=
'BoxPredictor'
)
box_encodings
=
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
class_predictions_with_background
=
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
return
(
box_encodings
,
class_predictions_with_background
)
with
self
.
test_session
(
graph
=
tf
.
Graph
()):
graph_fn
(
tf
.
random_uniform
([
4
,
32
,
32
,
3
],
dtype
=
tf
.
float32
),
tf
.
random_uniform
([
4
,
16
,
16
,
3
],
dtype
=
tf
.
float32
))
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
expected_variable_set
=
set
([
# Box prediction tower
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_0/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictionTower/conv2d_1/biases'
),
# Box prediction head
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'BoxPredictor/biases'
),
# Class prediction tower
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_0/biases'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictionTower/conv2d_1/biases'
),
# Class prediction head
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/depthwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/pointwise_weights'
),
(
'BoxPredictor/WeightSharedConvolutionalBoxPredictor/'
'ClassPredictor/biases'
)])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
def
test_no_batchnorm_params_when_batchnorm_is_not_configured
(
self
):
def
test_no_batchnorm_params_when_batchnorm_is_not_configured
(
self
):
num_classes_without_background
=
6
num_classes_without_background
=
6
def
graph_fn
(
image_features1
,
image_features2
):
def
graph_fn
(
image_features1
,
image_features2
):
...
...
research/object_detection/predictors/convolutional_keras_box_predictor.py
0 → 100644
View file @
27b4acd4
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Convolutional Box Predictors with and without weight sharing."""
import
collections
import
tensorflow
as
tf
from
object_detection.core
import
box_predictor
from
object_detection.utils
import
static_shape
keras
=
tf
.
keras
.
layers
BOX_ENCODINGS
=
box_predictor
.
BOX_ENCODINGS
CLASS_PREDICTIONS_WITH_BACKGROUND
=
(
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
)
MASK_PREDICTIONS
=
box_predictor
.
MASK_PREDICTIONS
class
_NoopVariableScope
(
object
):
"""A dummy class that does not push any scope."""
def
__enter__
(
self
):
return
None
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
return
False
class
ConvolutionalBoxPredictor
(
box_predictor
.
KerasBoxPredictor
):
"""Convolutional Keras Box Predictor.
Optionally add an intermediate 1x1 convolutional layer after features and
predict in parallel branches box_encodings and
class_predictions_with_background.
Currently this box predictor assumes that predictions are "shared" across
classes --- that is each anchor makes box predictions which do not depend
on class.
"""
def
__init__
(
self
,
is_training
,
num_classes
,
box_prediction_heads
,
class_prediction_heads
,
other_heads
,
conv_hyperparams
,
num_layers_before_predictor
,
min_depth
,
max_depth
,
freeze_batchnorm
,
inplace_batchnorm_update
,
name
=
None
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
box_prediction_heads: A list of heads that predict the boxes.
class_prediction_heads: A list of heads that predict the classes.
other_heads: A dictionary mapping head names to lists of convolutional
heads.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
min_depth: Minimum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
feature map will be inserted before location and class predictions.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
Raises:
ValueError: if min_depth > max_depth.
"""
super
(
ConvolutionalBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
,
freeze_batchnorm
=
freeze_batchnorm
,
inplace_batchnorm_update
=
inplace_batchnorm_update
,
name
=
name
)
if
min_depth
>
max_depth
:
raise
ValueError
(
'min_depth should be less than or equal to max_depth'
)
if
len
(
box_prediction_heads
)
!=
len
(
class_prediction_heads
):
raise
ValueError
(
'All lists of heads must be the same length.'
)
for
other_head_list
in
other_heads
.
values
():
if
len
(
box_prediction_heads
)
!=
len
(
other_head_list
):
raise
ValueError
(
'All lists of heads must be the same length.'
)
self
.
_prediction_heads
=
{
BOX_ENCODINGS
:
box_prediction_heads
,
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_prediction_heads
,
}
if
other_heads
:
self
.
_prediction_heads
.
update
(
other_heads
)
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_min_depth
=
min_depth
self
.
_max_depth
=
max_depth
self
.
_num_layers_before_predictor
=
num_layers_before_predictor
self
.
_shared_nets
=
[]
def
build
(
self
,
input_shapes
):
"""Creates the variables of the layer."""
if
len
(
input_shapes
)
!=
len
(
self
.
_prediction_heads
[
BOX_ENCODINGS
]):
raise
ValueError
(
'This box predictor was constructed with %d heads,'
'but there are %d inputs.'
%
(
len
(
self
.
_prediction_heads
[
BOX_ENCODINGS
]),
len
(
input_shapes
)))
for
stack_index
,
input_shape
in
enumerate
(
input_shapes
):
net
=
tf
.
keras
.
Sequential
(
name
=
'PreHeadConvolutions_%d'
%
stack_index
)
self
.
_shared_nets
.
append
(
net
)
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
input_shape
)
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
.
add
(
keras
.
Conv2D
(
depth
,
[
1
,
1
],
name
=
'Conv2d_%d_1x1_%d'
%
(
i
,
depth
),
padding
=
'SAME'
,
**
self
.
_conv_hyperparams
.
params
()))
net
.
add
(
self
.
_conv_hyperparams
.
build_batch_norm
(
training
=
(
self
.
_is_training
and
not
self
.
_freeze_batchnorm
),
name
=
'Conv2d_%d_1x1_%d_norm'
%
(
i
,
depth
)))
net
.
add
(
self
.
_conv_hyperparams
.
build_activation_layer
(
name
=
'Conv2d_%d_1x1_%d_activation'
%
(
i
,
depth
),
))
self
.
built
=
True
def
_predict
(
self
,
image_features
):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A list of float tensors of shape [batch_size, height_i,
width_i, channels_i] containing features for a batch of images.
Returns:
box_encodings: A list of float tensors of shape
[batch_size, num_anchors_i, q, code_size] representing the location of
the objects, where q is 1 or the number of classes. Each entry in the
list corresponds to a feature map in the input `image_features` list.
class_predictions_with_background: A list of float tensors of shape
[batch_size, num_anchors_i, num_classes + 1] representing the class
predictions for the proposals. Each entry in the list corresponds to a
feature map in the input `image_features` list.
"""
predictions
=
collections
.
defaultdict
(
list
)
for
(
index
,
image_feature
)
in
enumerate
(
image_features
):
# Apply shared conv layers before the head predictors.
net
=
self
.
_shared_nets
[
index
](
image_feature
)
for
head_name
in
self
.
_prediction_heads
:
head_obj
=
self
.
_prediction_heads
[
head_name
][
index
]
prediction
=
head_obj
(
net
)
predictions
[
head_name
].
append
(
prediction
)
return
predictions
research/object_detection/predictors/convolutional_keras_box_predictor_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.predictors.convolutional_keras_box_predictor."""
import
numpy
as
np
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
box_predictor_builder
from
object_detection.builders
import
hyperparams_builder
from
object_detection.predictors
import
convolutional_keras_box_predictor
as
box_predictor
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
class
ConvolutionalKerasBoxPredictorTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_get_boxes_for_five_aspect_ratios_per_location
(
self
):
def
graph_fn
(
image_features
):
conv_box_predictor
=
(
box_predictor_builder
.
build_convolutional_keras_box_predictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
num_predictions_per_location_list
=
[
5
],
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
))
box_predictions
=
conv_box_predictor
([
image_features
])
box_encodings
=
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
objectness_predictions
=
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
return
(
box_encodings
,
objectness_predictions
)
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
objectness_predictions
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions
.
shape
,
[
4
,
320
,
1
])
def
test_get_boxes_for_one_aspect_ratio_per_location
(
self
):
def
graph_fn
(
image_features
):
conv_box_predictor
=
(
box_predictor_builder
.
build_convolutional_keras_box_predictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
num_predictions_per_location_list
=
[
1
],
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
))
box_predictions
=
conv_box_predictor
([
image_features
])
box_encodings
=
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
objectness_predictions
=
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
return
(
box_encodings
,
objectness_predictions
)
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
(
box_encodings
,
objectness_predictions
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
64
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions
.
shape
,
[
4
,
64
,
1
])
def
test_get_multi_class_predictions_for_five_aspect_ratios_per_location
(
self
):
num_classes_without_background
=
6
image_features
=
np
.
random
.
rand
(
4
,
8
,
8
,
64
).
astype
(
np
.
float32
)
def
graph_fn
(
image_features
):
conv_box_predictor
=
(
box_predictor_builder
.
build_convolutional_keras_box_predictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
num_predictions_per_location_list
=
[
5
],
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
))
box_predictions
=
conv_box_predictor
([
image_features
])
box_encodings
=
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
class_predictions_with_background
=
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
return
(
box_encodings
,
class_predictions_with_background
)
(
box_encodings
,
class_predictions_with_background
)
=
self
.
execute
(
graph_fn
,
[
image_features
])
self
.
assertAllEqual
(
box_encodings
.
shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background
.
shape
,
[
4
,
320
,
num_classes_without_background
+
1
])
def
test_get_predictions_with_feature_maps_of_dynamic_shape
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
])
conv_box_predictor
=
(
box_predictor_builder
.
build_convolutional_keras_box_predictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_conv_hyperparams
(),
freeze_batchnorm
=
False
,
inplace_batchnorm_update
=
False
,
num_predictions_per_location_list
=
[
5
],
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
))
box_predictions
=
conv_box_predictor
([
image_features
])
box_encodings
=
tf
.
concat
(
box_predictions
[
box_predictor
.
BOX_ENCODINGS
],
axis
=
1
)
objectness_predictions
=
tf
.
concat
(
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
],
axis
=
1
)
init_op
=
tf
.
global_variables_initializer
()
resolution
=
32
expected_num_anchors
=
resolution
*
resolution
*
5
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)],
feed_dict
=
{
image_features
:
np
.
random
.
rand
(
4
,
resolution
,
resolution
,
64
)})
actual_variable_set
=
set
(
[
var
.
op
.
name
for
var
in
tf
.
trainable_variables
()])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
expected_num_anchors
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
expected_variable_set
=
set
([
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/bias'
,
'BoxPredictor/PreHeadConvolutions_0/Conv2d_0_1x1_32/kernel'
,
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias'
,
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel'
,
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias'
,
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/kernel'
])
self
.
assertEqual
(
expected_variable_set
,
actual_variable_set
)
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/predictors/heads/box_head.py
View file @
27b4acd4
...
@@ -19,6 +19,7 @@ Contains Box prediction head classes for different meta architectures.
...
@@ -19,6 +19,7 @@ Contains Box prediction head classes for different meta architectures.
All the box prediction heads have a predict function that receives the
All the box prediction heads have a predict function that receives the
`features` as the first argument and returns `box_encodings`.
`features` as the first argument and returns `box_encodings`.
"""
"""
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
from
object_detection.predictors.heads
import
head
...
@@ -196,18 +197,22 @@ class WeightSharedConvolutionalBoxHead(head.Head):
...
@@ -196,18 +197,22 @@ class WeightSharedConvolutionalBoxHead(head.Head):
def
__init__
(
self
,
def
__init__
(
self
,
box_code_size
,
box_code_size
,
kernel_size
=
3
,
kernel_size
=
3
,
class_prediction_bias_init
=
0.0
):
use_depthwise
=
False
,
box_encodings_clip_range
=
None
):
"""Constructor.
"""Constructor.
Args:
Args:
box_code_size: Size of encoding for each box.
box_code_size: Size of encoding for each box.
kernel_size: Size of final convolution kernel.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
use_depthwise: Whether to use depthwise convolutions for prediction steps.
conv2d layer before class prediction.
Default is False.
box_encodings_clip_range: Min and max values for clipping box_encodings.
"""
"""
super
(
WeightSharedConvolutionalBoxHead
,
self
).
__init__
()
super
(
WeightSharedConvolutionalBoxHead
,
self
).
__init__
()
self
.
_box_code_size
=
box_code_size
self
.
_box_code_size
=
box_code_size
self
.
_kernel_size
=
kernel_size
self
.
_kernel_size
=
kernel_size
self
.
_use_depthwise
=
use_depthwise
self
.
_box_encodings_clip_range
=
box_encodings_clip_range
def
predict
(
self
,
features
,
num_predictions_per_location
):
def
predict
(
self
,
features
,
num_predictions_per_location
):
"""Predicts boxes.
"""Predicts boxes.
...
@@ -224,7 +229,11 @@ class WeightSharedConvolutionalBoxHead(head.Head):
...
@@ -224,7 +229,11 @@ class WeightSharedConvolutionalBoxHead(head.Head):
the objects.
the objects.
"""
"""
box_encodings_net
=
features
box_encodings_net
=
features
box_encodings
=
slim
.
conv2d
(
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
box_encodings
=
conv_op
(
box_encodings_net
,
box_encodings_net
,
num_predictions_per_location
*
self
.
_box_code_size
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
[
self
.
_kernel_size
,
self
.
_kernel_size
],
...
@@ -234,6 +243,11 @@ class WeightSharedConvolutionalBoxHead(head.Head):
...
@@ -234,6 +243,11 @@ class WeightSharedConvolutionalBoxHead(head.Head):
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
if
batch_size
is
None
:
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
batch_size
=
tf
.
shape
(
features
)[
0
]
# Clipping the box encodings to make the inference graph TPU friendly.
if
self
.
_box_encodings_clip_range
is
not
None
:
box_encodings
=
tf
.
clip_by_value
(
box_encodings
,
self
.
_box_encodings_clip_range
.
min
,
self
.
_box_encodings_clip_range
.
max
)
box_encodings
=
tf
.
reshape
(
box_encodings
,
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
batch_size
,
-
1
,
self
.
_box_code_size
])
[
batch_size
,
-
1
,
self
.
_box_code_size
])
return
box_encodings
return
box_encodings
research/object_detection/predictors/heads/class_head.py
View file @
27b4acd4
...
@@ -19,6 +19,7 @@ Contains Class prediction head classes for different meta architectures.
...
@@ -19,6 +19,7 @@ Contains Class prediction head classes for different meta architectures.
All the class prediction heads have a predict function that receives the
All the class prediction heads have a predict function that receives the
`features` as the first argument and returns class predictions with background.
`features` as the first argument and returns class predictions with background.
"""
"""
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
from
object_detection.predictors.heads
import
head
...
@@ -211,7 +212,9 @@ class WeightSharedConvolutionalClassHead(head.Head):
...
@@ -211,7 +212,9 @@ class WeightSharedConvolutionalClassHead(head.Head):
kernel_size
=
3
,
kernel_size
=
3
,
class_prediction_bias_init
=
0.0
,
class_prediction_bias_init
=
0.0
,
use_dropout
=
False
,
use_dropout
=
False
,
dropout_keep_prob
=
0.8
):
dropout_keep_prob
=
0.8
,
use_depthwise
=
False
,
score_converter_fn
=
tf
.
identity
):
"""Constructor.
"""Constructor.
Args:
Args:
...
@@ -224,6 +227,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
...
@@ -224,6 +227,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
conv2d layer before class prediction.
conv2d layer before class prediction.
use_dropout: Whether to apply dropout to class prediction head.
use_dropout: Whether to apply dropout to class prediction head.
dropout_keep_prob: Probability of keeping activiations.
dropout_keep_prob: Probability of keeping activiations.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
score_converter_fn: Callable elementwise nonlinearity (that takes tensors
as inputs and returns tensors).
"""
"""
super
(
WeightSharedConvolutionalClassHead
,
self
).
__init__
()
super
(
WeightSharedConvolutionalClassHead
,
self
).
__init__
()
self
.
_num_classes
=
num_classes
self
.
_num_classes
=
num_classes
...
@@ -231,6 +238,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
...
@@ -231,6 +238,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_use_dropout
=
use_dropout
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_use_depthwise
=
use_depthwise
self
.
_score_converter_fn
=
score_converter_fn
def
predict
(
self
,
features
,
num_predictions_per_location
):
def
predict
(
self
,
features
,
num_predictions_per_location
):
"""Predicts boxes.
"""Predicts boxes.
...
@@ -252,7 +261,11 @@ class WeightSharedConvolutionalClassHead(head.Head):
...
@@ -252,7 +261,11 @@ class WeightSharedConvolutionalClassHead(head.Head):
if
self
.
_use_dropout
:
if
self
.
_use_dropout
:
class_predictions_net
=
slim
.
dropout
(
class_predictions_net
=
slim
.
dropout
(
class_predictions_net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_with_background
=
slim
.
conv2d
(
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
class_predictions_with_background
=
conv_op
(
class_predictions_net
,
class_predictions_net
,
num_predictions_per_location
*
num_class_slots
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
[
self
.
_kernel_size
,
self
.
_kernel_size
],
...
@@ -264,6 +277,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
...
@@ -264,6 +277,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
if
batch_size
is
None
:
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
batch_size
=
tf
.
shape
(
features
)[
0
]
class_predictions_with_background
=
self
.
_score_converter_fn
(
class_predictions_with_background
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
-
1
,
num_class_slots
])
class_predictions_with_background
,
[
batch_size
,
-
1
,
num_class_slots
])
return
class_predictions_with_background
return
class_predictions_with_background
research/object_detection/predictors/heads/head.py
View file @
27b4acd4
...
@@ -36,6 +36,8 @@ Mask RCNN box predictor.
...
@@ -36,6 +36,8 @@ Mask RCNN box predictor.
"""
"""
from
abc
import
abstractmethod
from
abc
import
abstractmethod
import
tensorflow
as
tf
class
Head
(
object
):
class
Head
(
object
):
"""Mask RCNN head base class."""
"""Mask RCNN head base class."""
...
@@ -57,3 +59,23 @@ class Head(object):
...
@@ -57,3 +59,23 @@ class Head(object):
A tf.float32 tensor.
A tf.float32 tensor.
"""
"""
pass
pass
class
KerasHead
(
tf
.
keras
.
Model
):
"""Keras head base class."""
def
call
(
self
,
features
):
"""The Keras model call will delegate to the `_predict` method."""
return
self
.
_predict
(
features
)
@
abstractmethod
def
_predict
(
self
,
features
):
"""Returns the head's predictions.
Args:
features: A float tensor of features.
Returns:
A tf.float32 tensor.
"""
pass
research/object_detection/predictors/heads/keras_box_head.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box Head.
Contains Box prediction head classes for different meta architectures.
All the box prediction heads have a _predict function that receives the
`features` as the first argument and returns `box_encodings`.
"""
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
class
ConvolutionalBoxHead
(
head
.
KerasHead
):
"""Convolutional box prediction head."""
def
__init__
(
self
,
is_training
,
box_code_size
,
kernel_size
,
num_predictions_per_location
,
conv_hyperparams
,
freeze_batchnorm
,
use_depthwise
=
True
,
name
=
None
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
box_code_size: Size of encoding for each box.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
num_predictions_per_location: Number of box predictions to be made per
spatial location. Int specifying number of boxes per location.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
Raises:
ValueError: if min_depth > max_depth.
"""
super
(
ConvolutionalBoxHead
,
self
).
__init__
(
name
=
name
)
self
.
_is_training
=
is_training
self
.
_box_code_size
=
box_code_size
self
.
_kernel_size
=
kernel_size
self
.
_num_predictions_per_location
=
num_predictions_per_location
self
.
_use_depthwise
=
use_depthwise
self
.
_box_encoder_layers
=
[]
if
self
.
_use_depthwise
:
self
.
_box_encoder_layers
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
depth_multiplier
=
1
,
strides
=
1
,
dilation_rate
=
1
,
name
=
'BoxEncodingPredictor_depthwise'
,
**
conv_hyperparams
.
params
()))
self
.
_box_encoder_layers
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
'BoxEncodingPredictor_depthwise_batchnorm'
))
self
.
_box_encoder_layers
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
'BoxEncodingPredictor_depthwise_activation'
))
self
.
_box_encoder_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_box_code_size
,
[
1
,
1
],
name
=
'BoxEncodingPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
None
)))
else
:
self
.
_box_encoder_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
name
=
'BoxEncodingPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
None
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
Args:
features: A float tensor of shape [batch_size, height, width, channels]
containing image features.
Returns:
box_encodings: A float tensor of shape
[batch_size, num_anchors, q, code_size] representing the location of
the objects, where q is 1 or the number of classes.
"""
box_encodings
=
features
for
layer
in
self
.
_box_encoder_layers
:
box_encodings
=
layer
(
box_encodings
)
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
batch_size
,
-
1
,
1
,
self
.
_box_code_size
])
return
box_encodings
research/object_detection/predictors/heads/keras_box_head_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.predictors.heads.box_head."""
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.predictors.heads
import
keras_box_head
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
class
ConvolutionalKerasBoxHeadTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_prediction_size_depthwise_false
(
self
):
conv_hyperparams
=
self
.
_build_conv_hyperparams
()
box_prediction_head
=
keras_box_head
.
ConvolutionalBoxHead
(
is_training
=
True
,
box_code_size
=
4
,
kernel_size
=
3
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
False
,
num_predictions_per_location
=
1
,
use_depthwise
=
False
)
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
box_encodings
=
box_prediction_head
(
image_feature
)
self
.
assertAllEqual
([
64
,
323
,
1
,
4
],
box_encodings
.
get_shape
().
as_list
())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/predictors/heads/keras_class_head.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class Head.
Contains Class prediction head classes for different meta architectures.
All the class prediction heads have a predict function that receives the
`features` as the first argument and returns class predictions with background.
"""
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
class
ConvolutionalClassHead
(
head
.
KerasHead
):
"""Convolutional class prediction head."""
def
__init__
(
self
,
is_training
,
num_classes
,
use_dropout
,
dropout_keep_prob
,
kernel_size
,
num_predictions_per_location
,
conv_hyperparams
,
freeze_batchnorm
,
class_prediction_bias_init
=
0.0
,
use_depthwise
=
False
,
name
=
None
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
num_predictions_per_location: Number of box predictions to be made per
spatial location. Int specifying number of boxes per location.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
Raises:
ValueError: if min_depth > max_depth.
"""
super
(
ConvolutionalClassHead
,
self
).
__init__
(
name
=
name
)
self
.
_is_training
=
is_training
self
.
_num_classes
=
num_classes
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_kernel_size
=
kernel_size
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_use_depthwise
=
use_depthwise
self
.
_num_class_slots
=
self
.
_num_classes
+
1
self
.
_class_predictor_layers
=
[]
if
self
.
_use_dropout
:
self
.
_class_predictor_layers
.
append
(
# The Dropout layer's `training` parameter for the call method must
# be set implicitly by the Keras set_learning_phase. The object
# detection training code takes care of this.
tf
.
keras
.
layers
.
Dropout
(
rate
=
1.0
-
self
.
_dropout_keep_prob
))
if
self
.
_use_depthwise
:
self
.
_class_predictor_layers
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
depth_multiplier
=
1
,
strides
=
1
,
dilation_rate
=
1
,
name
=
'ClassPredictor_depthwise'
,
**
conv_hyperparams
.
params
()))
self
.
_class_predictor_layers
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
'ClassPredictor_depthwise_batchnorm'
))
self
.
_class_predictor_layers
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
'ClassPredictor_depthwise_activation'
))
self
.
_class_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_num_class_slots
,
[
1
,
1
],
name
=
'ClassPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
None
)))
else
:
self
.
_class_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
name
=
'ClassPredictor'
,
bias_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
),
**
conv_hyperparams
.
params
(
activation
=
None
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
Args:
features: A float tensor of shape [batch_size, height, width, channels]
containing image features.
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
# Add a slot for the background class.
class_predictions_with_background
=
features
for
layer
in
self
.
_class_predictor_layers
:
class_predictions_with_background
=
layer
(
class_predictions_with_background
)
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
-
1
,
self
.
_num_class_slots
])
return
class_predictions_with_background
research/object_detection/predictors/heads/keras_class_head_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.predictors.heads.class_head."""
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.predictors.heads
import
keras_class_head
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
class
ConvolutionalKerasClassPredictorTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_prediction_size_depthwise_false
(
self
):
conv_hyperparams
=
self
.
_build_conv_hyperparams
()
class_prediction_head
=
keras_class_head
.
ConvolutionalClassHead
(
is_training
=
True
,
num_classes
=
20
,
use_dropout
=
True
,
dropout_keep_prob
=
0.5
,
kernel_size
=
3
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
False
,
num_predictions_per_location
=
1
,
use_depthwise
=
False
)
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
class_predictions
=
class_prediction_head
(
image_feature
,)
self
.
assertAllEqual
([
64
,
323
,
21
],
class_predictions
.
get_shape
().
as_list
())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/predictors/heads/keras_mask_head.py
0 → 100644
View file @
27b4acd4
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keras Mask Heads.
Contains Mask prediction head classes for different meta architectures.
All the mask prediction heads have a predict function that receives the
`features` as the first argument and returns `mask_predictions`.
"""
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
class
ConvolutionalMaskHead
(
head
.
KerasHead
):
"""Convolutional class prediction head."""
def
__init__
(
self
,
is_training
,
num_classes
,
use_dropout
,
dropout_keep_prob
,
kernel_size
,
num_predictions_per_location
,
conv_hyperparams
,
freeze_batchnorm
,
use_depthwise
=
False
,
mask_height
=
7
,
mask_width
=
7
,
masks_are_class_agnostic
=
False
,
name
=
None
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
num_predictions_per_location: Number of box predictions to be made per
spatial location. Int specifying number of boxes per location.
conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
containing hyperparameters for convolution ops.
freeze_batchnorm: Bool. Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
use_depthwise: Whether to use depthwise convolutions for prediction
steps. Default is False.
mask_height: Desired output mask height. The default value is 7.
mask_width: Desired output mask width. The default value is 7.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
name: A string name scope to assign to the model. If `None`, Keras
will auto-generate one from the class name.
Raises:
ValueError: if min_depth > max_depth.
"""
super
(
ConvolutionalMaskHead
,
self
).
__init__
(
name
=
name
)
self
.
_is_training
=
is_training
self
.
_num_classes
=
num_classes
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_kernel_size
=
kernel_size
self
.
_num_predictions_per_location
=
num_predictions_per_location
self
.
_use_depthwise
=
use_depthwise
self
.
_mask_height
=
mask_height
self
.
_mask_width
=
mask_width
self
.
_masks_are_class_agnostic
=
masks_are_class_agnostic
self
.
_mask_predictor_layers
=
[]
# Add a slot for the background class.
if
self
.
_masks_are_class_agnostic
:
self
.
_num_masks
=
1
else
:
self
.
_num_masks
=
self
.
_num_classes
num_mask_channels
=
self
.
_num_masks
*
self
.
_mask_height
*
self
.
_mask_width
if
self
.
_use_dropout
:
self
.
_mask_predictor_layers
.
append
(
# The Dropout layer's `training` parameter for the call method must
# be set implicitly by the Keras set_learning_phase. The object
# detection training code takes care of this.
tf
.
keras
.
layers
.
Dropout
(
rate
=
1.0
-
self
.
_dropout_keep_prob
))
if
self
.
_use_depthwise
:
self
.
_mask_predictor_layers
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
depth_multiplier
=
1
,
strides
=
1
,
dilation_rate
=
1
,
name
=
'MaskPredictor_depthwise'
,
**
conv_hyperparams
.
params
()))
self
.
_mask_predictor_layers
.
append
(
conv_hyperparams
.
build_batch_norm
(
training
=
(
is_training
and
not
freeze_batchnorm
),
name
=
'MaskPredictor_depthwise_batchnorm'
))
self
.
_mask_predictor_layers
.
append
(
conv_hyperparams
.
build_activation_layer
(
name
=
'MaskPredictor_depthwise_activation'
))
self
.
_mask_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
num_mask_channels
,
[
1
,
1
],
name
=
'MaskPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
None
)))
else
:
self
.
_mask_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
num_mask_channels
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
name
=
'MaskPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
None
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
Args:
features: A float tensor of shape [batch_size, height, width, channels]
containing image features.
Returns:
mask_predictions: A float tensors of shape
[batch_size, num_anchors, num_masks, mask_height, mask_width]
representing the mask predictions for the proposals.
"""
mask_predictions
=
features
for
layer
in
self
.
_mask_predictor_layers
:
mask_predictions
=
layer
(
mask_predictions
)
batch_size
=
features
.
get_shape
().
as_list
()[
0
]
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
mask_predictions
=
tf
.
reshape
(
mask_predictions
,
[
batch_size
,
-
1
,
self
.
_num_masks
,
self
.
_mask_height
,
self
.
_mask_width
])
return
mask_predictions
research/object_detection/predictors/heads/keras_mask_head_test.py
0 → 100644
View file @
27b4acd4
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.predictors.heads.mask_head."""
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.predictors.heads
import
keras_mask_head
from
object_detection.protos
import
hyperparams_pb2
from
object_detection.utils
import
test_case
class
ConvolutionalMaskPredictorTest
(
test_case
.
TestCase
):
def
_build_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
test_prediction_size_use_depthwise_false
(
self
):
conv_hyperparams
=
self
.
_build_conv_hyperparams
()
mask_prediction_head
=
keras_mask_head
.
ConvolutionalMaskHead
(
is_training
=
True
,
num_classes
=
20
,
use_dropout
=
True
,
dropout_keep_prob
=
0.5
,
kernel_size
=
3
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
False
,
num_predictions_per_location
=
1
,
use_depthwise
=
False
,
mask_height
=
7
,
mask_width
=
7
)
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
mask_predictions
=
mask_prediction_head
(
image_feature
)
self
.
assertAllEqual
([
64
,
323
,
20
,
7
,
7
],
mask_predictions
.
get_shape
().
as_list
())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
def
test_class_agnostic_prediction_size_use_depthwise_false
(
self
):
conv_hyperparams
=
self
.
_build_conv_hyperparams
()
mask_prediction_head
=
keras_mask_head
.
ConvolutionalMaskHead
(
is_training
=
True
,
num_classes
=
20
,
use_dropout
=
True
,
dropout_keep_prob
=
0.5
,
kernel_size
=
3
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
False
,
num_predictions_per_location
=
1
,
use_depthwise
=
False
,
mask_height
=
7
,
mask_width
=
7
,
masks_are_class_agnostic
=
True
)
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
mask_predictions
=
mask_prediction_head
(
image_feature
)
self
.
assertAllEqual
([
64
,
323
,
1
,
7
,
7
],
mask_predictions
.
get_shape
().
as_list
())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/predictors/heads/mask_head.py
View file @
27b4acd4
...
@@ -148,6 +148,7 @@ class MaskRCNNMaskHead(head.Head):
...
@@ -148,6 +148,7 @@ class MaskRCNNMaskHead(head.Head):
upsampled_features
,
upsampled_features
,
num_outputs
=
num_masks
,
num_outputs
=
num_masks
,
activation_fn
=
None
,
activation_fn
=
None
,
normalizer_fn
=
None
,
kernel_size
=
[
3
,
3
])
kernel_size
=
[
3
,
3
])
return
tf
.
expand_dims
(
return
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
...
...
research/object_detection/protos/box_predictor.proto
View file @
27b4acd4
...
@@ -15,7 +15,21 @@ message BoxPredictor {
...
@@ -15,7 +15,21 @@ message BoxPredictor {
}
}
}
}
// Configuration proto for MaskHead in predictors.
// Next id: 4
message
MaskHead
{
// The height and the width of the predicted mask. Only used when
// predict_instance_masks is true.
optional
int32
mask_height
=
1
[
default
=
15
];
optional
int32
mask_width
=
2
[
default
=
15
];
// Whether to predict class agnostic masks. Only used when
// predict_instance_masks is true.
optional
bool
masks_are_class_agnostic
=
3
[
default
=
true
];
}
// Configuration proto for Convolutional box predictor.
// Configuration proto for Convolutional box predictor.
// Next id: 13
message
ConvolutionalBoxPredictor
{
message
ConvolutionalBoxPredictor
{
// Hyperparameters for convolution ops used in the box predictor.
// Hyperparameters for convolution ops used in the box predictor.
optional
Hyperparams
conv_hyperparams
=
1
;
optional
Hyperparams
conv_hyperparams
=
1
;
...
@@ -55,9 +69,13 @@ message ConvolutionalBoxPredictor {
...
@@ -55,9 +69,13 @@ message ConvolutionalBoxPredictor {
// Whether to use depthwise separable convolution for box predictor layers.
// Whether to use depthwise separable convolution for box predictor layers.
optional
bool
use_depthwise
=
11
[
default
=
false
];
optional
bool
use_depthwise
=
11
[
default
=
false
];
// Configs for a mask prediction head.
optional
MaskHead
mask_head
=
12
;
}
}
// Configuration proto for weight shared convolutional box predictor.
// Configuration proto for weight shared convolutional box predictor.
// Next id: 18
message
WeightSharedConvolutionalBoxPredictor
{
message
WeightSharedConvolutionalBoxPredictor
{
// Hyperparameters for convolution ops used in the box predictor.
// Hyperparameters for convolution ops used in the box predictor.
optional
Hyperparams
conv_hyperparams
=
1
;
optional
Hyperparams
conv_hyperparams
=
1
;
...
@@ -85,12 +103,37 @@ message WeightSharedConvolutionalBoxPredictor {
...
@@ -85,12 +103,37 @@ message WeightSharedConvolutionalBoxPredictor {
// Whether to use dropout for class prediction.
// Whether to use dropout for class prediction.
optional
bool
use_dropout
=
11
[
default
=
false
];
optional
bool
use_dropout
=
11
[
default
=
false
];
// Keep probability for dropout
// Keep probability for dropout
.
optional
float
dropout_keep_probability
=
12
[
default
=
0.8
];
optional
float
dropout_keep_probability
=
12
[
default
=
0.8
];
// Whether to share the multi-layer tower between box prediction and class
// Whether to share the multi-layer tower between box prediction and class
// prediction heads.
// prediction heads.
optional
bool
share_prediction_tower
=
13
[
default
=
false
];
optional
bool
share_prediction_tower
=
13
[
default
=
false
];
// Whether to use depthwise separable convolution for box predictor layers.
optional
bool
use_depthwise
=
14
[
default
=
false
];
// Configs for a mask prediction head.
optional
MaskHead
mask_head
=
15
;
// Enum to specify how to convert the detection scores at inference time.
enum
ScoreConverter
{
// Input scores equals output scores.
IDENTITY
=
0
;
// Applies a sigmoid on input scores.
SIGMOID
=
1
;
}
// Callable elementwise score converter at inference time.
optional
ScoreConverter
score_converter
=
16
[
default
=
IDENTITY
];
// If specified, apply clipping to box encodings.
message
BoxEncodingsClipRange
{
optional
float
min
=
1
;
optional
float
max
=
2
;
}
optional
BoxEncodingsClipRange
box_encodings_clip_range
=
17
;
}
}
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
...
...
Prev
1
…
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment