Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e00e0e13
Commit
e00e0e13
authored
Dec 03, 2018
by
dreamdragon
Browse files
Merge remote-tracking branch 'upstream/master'
parents
b915db4e
402b561b
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
466 additions
and
111 deletions
+466
-111
research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
...ection/models/ssd_mobilenet_v2_keras_feature_extractor.py
+32
-29
research/object_detection/models/ssd_pnasnet_feature_extractor.py
.../object_detection/models/ssd_pnasnet_feature_extractor.py
+175
-0
research/object_detection/models/ssd_pnasnet_feature_extractor_test.py
...ct_detection/models/ssd_pnasnet_feature_extractor_test.py
+87
-0
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
...t_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
+7
-2
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
...on/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
+6
-5
research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor.py
...t_detection/models/ssd_resnet_v1_ppn_feature_extractor.py
+7
-2
research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py
...on/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py
+7
-3
research/object_detection/predictors/convolutional_keras_box_predictor.py
...detection/predictors/convolutional_keras_box_predictor.py
+19
-12
research/object_detection/predictors/convolutional_keras_box_predictor_test.py
...tion/predictors/convolutional_keras_box_predictor_test.py
+2
-2
research/object_detection/predictors/heads/class_head.py
research/object_detection/predictors/heads/class_head.py
+30
-30
research/object_detection/predictors/heads/class_head_test.py
...arch/object_detection/predictors/heads/class_head_test.py
+6
-6
research/object_detection/predictors/heads/keras_box_head.py
research/object_detection/predictors/heads/keras_box_head.py
+2
-2
research/object_detection/predictors/heads/keras_class_head.py
...rch/object_detection/predictors/heads/keras_class_head.py
+7
-7
research/object_detection/predictors/heads/keras_class_head_test.py
...bject_detection/predictors/heads/keras_class_head_test.py
+2
-2
research/object_detection/predictors/heads/keras_mask_head.py
...arch/object_detection/predictors/heads/keras_mask_head.py
+2
-2
research/object_detection/predictors/heads/mask_head.py
research/object_detection/predictors/heads/mask_head.py
+27
-7
research/object_detection/predictors/heads/mask_head_test.py
research/object_detection/predictors/heads/mask_head_test.py
+16
-0
research/object_detection/protos/box_predictor.proto
research/object_detection/protos/box_predictor.proto
+7
-0
research/object_detection/protos/faster_rcnn.proto
research/object_detection/protos/faster_rcnn.proto
+4
-0
research/object_detection/protos/preprocessor.proto
research/object_detection/protos/preprocessor.proto
+21
-0
No files found.
research/object_detection/models/ssd_mobilenet_v2_keras_feature_extractor.py
View file @
e00e0e13
...
...
@@ -85,41 +85,44 @@ class SSDMobileNetV2KerasFeatureExtractor(
override_base_feature_extractor_hyperparams
=
override_base_feature_extractor_hyperparams
,
name
=
name
)
feature_map_layout
=
{
self
.
_
feature_map_layout
=
{
'from_layer'
:
[
'layer_15/expansion_output'
,
'layer_19'
,
''
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'use_depthwise'
:
self
.
_use_depthwise
,
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
}
with
tf
.
name_scope
(
'MobilenetV2'
):
full_mobilenet_v2
=
mobilenet_v2
.
mobilenet_v2
(
batchnorm_training
=
(
is_training
and
not
freeze_batchnorm
),
conv_hyperparams
=
(
conv_hyperparams
if
self
.
_override_base_feature_extractor_hyperparams
else
None
),
weights
=
None
,
use_explicit_padding
=
use_explicit_padding
,
alpha
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
include_top
=
False
)
conv2d_11_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'block_13_expand_relu'
).
output
conv2d_13_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'out_relu'
).
output
self
.
mobilenet_v2
=
tf
.
keras
.
Model
(
inputs
=
full_mobilenet_v2
.
inputs
,
outputs
=
[
conv2d_11_pointwise
,
conv2d_13_pointwise
])
self
.
feature_map_generator
=
(
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
is_training
=
is_training
,
conv_hyperparams
=
conv_hyperparams
,
freeze_batchnorm
=
freeze_batchnorm
,
name
=
'FeatureMaps'
))
self
.
mobilenet_v2
=
None
self
.
feature_map_generator
=
None
def
build
(
self
,
input_shape
):
full_mobilenet_v2
=
mobilenet_v2
.
mobilenet_v2
(
batchnorm_training
=
(
self
.
_is_training
and
not
self
.
_freeze_batchnorm
),
conv_hyperparams
=
(
self
.
_conv_hyperparams
if
self
.
_override_base_feature_extractor_hyperparams
else
None
),
weights
=
None
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
alpha
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
include_top
=
False
)
conv2d_11_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'block_13_expand_relu'
).
output
conv2d_13_pointwise
=
full_mobilenet_v2
.
get_layer
(
name
=
'out_relu'
).
output
self
.
mobilenet_v2
=
tf
.
keras
.
Model
(
inputs
=
full_mobilenet_v2
.
inputs
,
outputs
=
[
conv2d_11_pointwise
,
conv2d_13_pointwise
])
self
.
feature_map_generator
=
(
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
self
.
_feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
is_training
=
self
.
_is_training
,
conv_hyperparams
=
self
.
_conv_hyperparams
,
freeze_batchnorm
=
self
.
_freeze_batchnorm
,
name
=
'FeatureMaps'
))
self
.
built
=
True
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
...
...
research/object_detection/models/ssd_pnasnet_feature_extractor.py
0 → 100644
View file @
e00e0e13
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSDFeatureExtractor for PNASNet features.
Based on PNASNet ImageNet model: https://arxiv.org/abs/1712.00559
"""
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
ssd_meta_arch
from
object_detection.models
import
feature_map_generators
from
object_detection.utils
import
context_manager
from
object_detection.utils
import
ops
from
nets.nasnet
import
pnasnet
slim
=
tf
.
contrib
.
slim
def
pnasnet_large_arg_scope_for_detection
(
is_batch_norm_training
=
False
):
"""Defines the default arg scope for the PNASNet Large for object detection.
This provides a small edit to switch batch norm training on and off.
Args:
is_batch_norm_training: Boolean indicating whether to train with batch norm.
Default is False.
Returns:
An `arg_scope` to use for the PNASNet Large Model.
"""
imagenet_scope
=
pnasnet
.
pnasnet_large_arg_scope
()
with
slim
.
arg_scope
(
imagenet_scope
):
with
slim
.
arg_scope
([
slim
.
batch_norm
],
is_training
=
is_batch_norm_training
)
as
sc
:
return
sc
class
SSDPNASNetFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using PNASNet features."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams_fn
,
reuse_weights
=
None
,
use_explicit_padding
=
False
,
use_depthwise
=
False
,
override_base_feature_extractor_hyperparams
=
False
):
"""PNASNet Feature Extractor for SSD Models.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops in the layers that are added on top of the
base feature extractor.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_depthwise: Whether to use depthwise convolutions.
override_base_feature_extractor_hyperparams: Whether to override
hyperparameters of the base feature extractor with the one from
`conv_hyperparams_fn`.
"""
super
(
SSDPNASNetFeatureExtractor
,
self
).
__init__
(
is_training
=
is_training
,
depth_multiplier
=
depth_multiplier
,
min_depth
=
min_depth
,
pad_to_multiple
=
pad_to_multiple
,
conv_hyperparams_fn
=
conv_hyperparams_fn
,
reuse_weights
=
reuse_weights
,
use_explicit_padding
=
use_explicit_padding
,
use_depthwise
=
use_depthwise
,
override_base_feature_extractor_hyperparams
=
override_base_feature_extractor_hyperparams
)
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
Maps pixel values to the range [-1, 1].
Args:
resized_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
return
(
2.0
/
255.0
)
*
resized_inputs
-
1.0
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
feature_map_layout
=
{
'from_layer'
:
[
'Cell_7'
,
'Cell_11'
,
''
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
-
1
,
512
,
256
,
256
,
128
],
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_depthwise'
:
self
.
_use_depthwise
,
}
with
slim
.
arg_scope
(
pnasnet_large_arg_scope_for_detection
(
is_batch_norm_training
=
self
.
_is_training
)):
with
slim
.
arg_scope
([
slim
.
conv2d
,
slim
.
batch_norm
,
slim
.
separable_conv2d
],
reuse
=
self
.
_reuse_weights
):
with
(
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
())
if
self
.
_override_base_feature_extractor_hyperparams
else
context_manager
.
IdentityContextManager
()):
_
,
image_features
=
pnasnet
.
build_pnasnet_large
(
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
num_classes
=
None
,
is_training
=
self
.
_is_training
,
final_endpoint
=
'Cell_11'
)
with
tf
.
variable_scope
(
'SSD_feature_maps'
,
reuse
=
self
.
_reuse_weights
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
()):
feature_maps
=
feature_map_generators
.
multi_resolution_feature_maps
(
feature_map_layout
=
feature_map_layout
,
depth_multiplier
=
self
.
_depth_multiplier
,
min_depth
=
self
.
_min_depth
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
return
feature_maps
.
values
()
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Note that this overrides the default implementation in
ssd_meta_arch.SSDFeatureExtractor which does not work for PNASNet
checkpoints.
Args:
feature_extractor_scope: A scope name for the first stage feature
extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
if
variable
.
op
.
name
.
startswith
(
feature_extractor_scope
):
var_name
=
variable
.
op
.
name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
var_name
+=
'/ExponentialMovingAverage'
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
research/object_detection/models/ssd_pnasnet_feature_extractor_test.py
0 → 100644
View file @
e00e0e13
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for ssd_pnas_feature_extractor."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
ssd_feature_extractor_test
from
object_detection.models
import
ssd_pnasnet_feature_extractor
slim
=
tf
.
contrib
.
slim
class
SsdPnasNetFeatureExtractorTest
(
ssd_feature_extractor_test
.
SsdFeatureExtractorTestBase
):
def
_create_feature_extractor
(
self
,
depth_multiplier
,
pad_to_multiple
,
is_training
=
True
,
use_explicit_padding
=
False
):
"""Constructs a new feature extractor.
Args:
depth_multiplier: float depth multiplier for feature extractor
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth
=
32
return
ssd_pnasnet_feature_extractor
.
SSDPNASNetFeatureExtractor
(
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
self
.
conv_hyperparams_fn
,
use_explicit_padding
=
use_explicit_padding
)
def
test_extract_features_returns_correct_shapes_128
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
8
,
8
,
2160
),
(
2
,
4
,
4
,
4320
),
(
2
,
2
,
2
,
512
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_extract_features_returns_correct_shapes_299
(
self
):
image_height
=
299
image_width
=
299
depth_multiplier
=
1.0
pad_to_multiple
=
1
expected_feature_map_shape
=
[(
2
,
19
,
19
,
2160
),
(
2
,
10
,
10
,
4320
),
(
2
,
5
,
5
,
512
),
(
2
,
3
,
3
,
256
),
(
2
,
2
,
2
,
256
),
(
2
,
1
,
1
,
128
)]
self
.
check_extract_features_returns_correct_shape
(
2
,
image_height
,
image_width
,
depth_multiplier
,
pad_to_multiple
,
expected_feature_map_shape
)
def
test_preprocess_returns_correct_value_range
(
self
):
image_height
=
128
image_width
=
128
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
2
,
image_height
,
image_width
,
3
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
np
.
abs
(
preprocessed_image
),
1.0
)))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor.py
View file @
e00e0e13
...
...
@@ -113,6 +113,8 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
...
...
@@ -122,8 +124,11 @@ class _SSDResnetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
if
resized_inputs
.
shape
.
as_list
()[
3
]
==
3
:
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
else
:
return
resized_inputs
def
_filter_features
(
self
,
image_features
):
# TODO(rathodv): Change resnet endpoint to strip scope prefixes instead
...
...
research/object_detection/models/ssd_resnet_v1_fpn_feature_extractor_testbase.py
View file @
e00e0e13
...
...
@@ -82,12 +82,15 @@ class SSDResnetFPNFeatureExtractorTestBase(
image_width
=
128
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
test_image
=
tf
.
constant
(
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertAllClose
(
preprocessed_image
,
test_image
-
[[
123.68
,
116.779
,
103.939
]])
with
self
.
test_session
()
as
sess
:
test_image_out
,
preprocessed_image_out
=
sess
.
run
(
[
test_image
,
preprocessed_image
])
self
.
assertAllClose
(
preprocessed_image_out
,
test_image_out
-
[[
123.68
,
116.779
,
103.939
]])
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
...
...
@@ -103,5 +106,3 @@ class SSDResnetFPNFeatureExtractorTestBase(
self
.
assertTrue
(
variable
.
name
.
startswith
(
self
.
_resnet_scope_name
())
or
variable
.
name
.
startswith
(
self
.
_fpn_scope_name
()))
research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor.py
View file @
e00e0e13
...
...
@@ -98,6 +98,8 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-mdnge.
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: a [batch, height, width, channels] float tensor
...
...
@@ -107,8 +109,11 @@ class _SSDResnetPpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
"""
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
if
resized_inputs
.
shape
.
as_list
()[
3
]
==
3
:
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
else
:
return
resized_inputs
def
extract_features
(
self
,
preprocessed_inputs
):
"""Extract features from preprocessed inputs.
...
...
research/object_detection/models/ssd_resnet_v1_ppn_feature_extractor_testbase.py
View file @
e00e0e13
...
...
@@ -15,6 +15,7 @@
"""Tests for ssd resnet v1 feature extractors."""
import
abc
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.models
import
ssd_feature_extractor_test
...
...
@@ -64,12 +65,15 @@ class SSDResnetPpnFeatureExtractorTestBase(
image_width
=
128
depth_multiplier
=
1
pad_to_multiple
=
1
test_image
=
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
test_image
=
tf
.
constant
(
np
.
random
.
rand
(
4
,
image_height
,
image_width
,
3
)
)
feature_extractor
=
self
.
_create_feature_extractor
(
depth_multiplier
,
pad_to_multiple
)
preprocessed_image
=
feature_extractor
.
preprocess
(
test_image
)
self
.
assertAllClose
(
preprocessed_image
,
test_image
-
[[
123.68
,
116.779
,
103.939
]])
with
self
.
test_session
()
as
sess
:
test_image_out
,
preprocessed_image_out
=
sess
.
run
(
[
test_image
,
preprocessed_image
])
self
.
assertAllClose
(
preprocessed_image_out
,
test_image_out
-
[[
123.68
,
116.779
,
103.939
]])
def
test_variables_only_created_in_scope
(
self
):
depth_multiplier
=
1
...
...
research/object_detection/predictors/convolutional_keras_box_predictor.py
View file @
e00e0e13
...
...
@@ -134,26 +134,32 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
(
len
(
self
.
_prediction_heads
[
BOX_ENCODINGS
]),
len
(
input_shapes
)))
for
stack_index
,
input_shape
in
enumerate
(
input_shapes
):
net
=
tf
.
keras
.
Sequential
(
name
=
'PreHeadConvolutions_%d'
%
stack_index
)
self
.
_shared_nets
.
append
(
net
)
net
=
[]
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
input_shape
)
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
.
add
(
keras
.
Conv2D
(
depth
,
[
1
,
1
],
name
=
'Conv2d_%d_1x1_%d'
%
(
i
,
depth
),
padding
=
'SAME'
,
**
self
.
_conv_hyperparams
.
params
()))
net
.
add
(
self
.
_conv_hyperparams
.
build_batch_norm
(
net
.
append
(
keras
.
Conv2D
(
depth
,
[
1
,
1
],
name
=
'SharedConvolutions_%d/Conv2d_%d_1x1_%d'
%
(
stack_index
,
i
,
depth
),
padding
=
'SAME'
,
**
self
.
_conv_hyperparams
.
params
()))
net
.
append
(
self
.
_conv_hyperparams
.
build_batch_norm
(
training
=
(
self
.
_is_training
and
not
self
.
_freeze_batchnorm
),
name
=
'Conv2d_%d_1x1_%d_norm'
%
(
i
,
depth
)))
net
.
add
(
self
.
_conv_hyperparams
.
build_activation_layer
(
name
=
'Conv2d_%d_1x1_%d_activation'
%
(
i
,
depth
),
name
=
'SharedConvolutions_%d/Conv2d_%d_1x1_%d_norm'
%
(
stack_index
,
i
,
depth
)))
net
.
append
(
self
.
_conv_hyperparams
.
build_activation_layer
(
name
=
'SharedConvolutions_%d/Conv2d_%d_1x1_%d_activation'
%
(
stack_index
,
i
,
depth
),
))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self
.
_shared_nets
.
append
(
net
)
self
.
built
=
True
def
_predict
(
self
,
image_features
):
...
...
@@ -175,10 +181,11 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
"""
predictions
=
collections
.
defaultdict
(
list
)
for
(
index
,
image_feature
)
in
enumerate
(
image_features
):
for
(
index
,
net
)
in
enumerate
(
image_features
):
# Apply shared conv layers before the head predictors.
net
=
self
.
_shared_nets
[
index
](
image_feature
)
for
layer
in
self
.
_shared_nets
[
index
]:
net
=
layer
(
net
)
for
head_name
in
self
.
_prediction_heads
:
head_obj
=
self
.
_prediction_heads
[
head_name
][
index
]
...
...
research/object_detection/predictors/convolutional_keras_box_predictor_test.py
View file @
e00e0e13
...
...
@@ -181,8 +181,8 @@ class ConvolutionalKerasBoxPredictorTest(test_case.TestCase):
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
expected_variable_set
=
set
([
'BoxPredictor/
PreHea
dConvolutions_0/Conv2d_0_1x1_32/bias'
,
'BoxPredictor/
PreHea
dConvolutions_0/Conv2d_0_1x1_32/kernel'
,
'BoxPredictor/
Share
dConvolutions_0/Conv2d_0_1x1_32/bias'
,
'BoxPredictor/
Share
dConvolutions_0/Conv2d_0_1x1_32/kernel'
,
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/bias'
,
'BoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/kernel'
,
'BoxPredictor/ConvolutionalClassHead_0/ClassPredictor/bias'
,
...
...
research/object_detection/predictors/heads/class_head.py
View file @
e00e0e13
...
...
@@ -34,16 +34,18 @@ class MaskRCNNClassHead(head.Head):
https://arxiv.org/abs/1703.06870
"""
def
__init__
(
self
,
is_training
,
num_classes
,
fc_hyperparams_fn
,
use_dropout
,
dropout_keep_prob
):
def
__init__
(
self
,
is_training
,
num_class_slots
,
fc_hyperparams_fn
,
use_dropout
,
dropout_keep_prob
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
...
...
@@ -54,7 +56,7 @@ class MaskRCNNClassHead(head.Head):
"""
super
(
MaskRCNNClassHead
,
self
).
__init__
()
self
.
_is_training
=
is_training
self
.
_num_class
e
s
=
num_class
e
s
self
.
_num_class
_slot
s
=
num_class
_slot
s
self
.
_fc_hyperparams_fn
=
fc_hyperparams_fn
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
...
...
@@ -70,7 +72,7 @@ class MaskRCNNClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_class
es + 1
] representing the class predictions for
[batch_size, 1, num_class
_slots
] representing the class predictions for
the proposals.
Raises:
...
...
@@ -91,11 +93,12 @@ class MaskRCNNClassHead(head.Head):
with
slim
.
arg_scope
(
self
.
_fc_hyperparams_fn
()):
class_predictions_with_background
=
slim
.
fully_connected
(
flattened_roi_pooled_features
,
self
.
_num_class
es
+
1
,
self
.
_num_class
_slots
,
activation_fn
=
None
,
scope
=
'ClassPredictor'
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
-
1
,
1
,
self
.
_num_classes
+
1
])
class_predictions_with_background
,
[
-
1
,
1
,
self
.
_num_class_slots
])
return
class_predictions_with_background
...
...
@@ -104,7 +107,7 @@ class ConvolutionalClassHead(head.Head):
def
__init__
(
self
,
is_training
,
num_class
e
s
,
num_class
_slot
s
,
use_dropout
,
dropout_keep_prob
,
kernel_size
,
...
...
@@ -115,7 +118,8 @@ class ConvolutionalClassHead(head.Head):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
...
...
@@ -137,7 +141,7 @@ class ConvolutionalClassHead(head.Head):
"""
super
(
ConvolutionalClassHead
,
self
).
__init__
()
self
.
_is_training
=
is_training
self
.
_num_class
e
s
=
num_class
e
s
self
.
_num_class
_slot
s
=
num_class
_slot
s
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_kernel_size
=
kernel_size
...
...
@@ -156,12 +160,10 @@ class ConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A float tensors of shape
[batch_size, num_anchors, num_class
es + 1
] representing the class
[batch_size, num_anchors, num_class
_slots
] representing the class
predictions for the proposals.
"""
net
=
features
# Add a slot for the background class.
num_class_slots
=
self
.
_num_classes
+
1
if
self
.
_use_dropout
:
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
if
self
.
_use_depthwise
:
...
...
@@ -171,7 +173,7 @@ class ConvolutionalClassHead(head.Head):
rate
=
1
,
scope
=
'ClassPredictor_depthwise'
)
class_predictions_with_background
=
slim
.
conv2d
(
class_predictions_with_background
,
num_predictions_per_location
*
num_class_slots
,
[
1
,
1
],
num_predictions_per_location
*
self
.
_
num_class_slots
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
normalizer_params
=
None
,
...
...
@@ -179,7 +181,7 @@ class ConvolutionalClassHead(head.Head):
else
:
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
num_predictions_per_location
*
self
.
_
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
activation_fn
=
None
,
normalizer_fn
=
None
,
...
...
@@ -194,7 +196,8 @@ class ConvolutionalClassHead(head.Head):
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
features
)[
0
]
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
-
1
,
num_class_slots
])
class_predictions_with_background
,
[
batch_size
,
-
1
,
self
.
_num_class_slots
])
return
class_predictions_with_background
...
...
@@ -208,7 +211,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""
def
__init__
(
self
,
num_class
e
s
,
num_class
_slot
s
,
kernel_size
=
3
,
class_prediction_bias_init
=
0.0
,
use_dropout
=
False
,
...
...
@@ -218,10 +221,8 @@ class WeightSharedConvolutionalClassHead(head.Head):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
kernel_size: Size of final convolution kernel.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
...
...
@@ -233,7 +234,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
as inputs and returns tensors).
"""
super
(
WeightSharedConvolutionalClassHead
,
self
).
__init__
()
self
.
_num_class
e
s
=
num_class
e
s
self
.
_num_class
_slot
s
=
num_class
_slot
s
self
.
_kernel_size
=
kernel_size
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_use_dropout
=
use_dropout
...
...
@@ -252,12 +253,10 @@ class WeightSharedConvolutionalClassHead(head.Head):
Returns:
class_predictions_with_background: A tensor of shape
[batch_size, num_anchors, num_class
es + 1
] representing the class
[batch_size, num_anchors, num_class
_slots
] representing the class
predictions for the proposals.
"""
class_predictions_net
=
features
num_class_slots
=
self
.
_num_classes
+
1
# Add a slot for the background class.
if
self
.
_use_dropout
:
class_predictions_net
=
slim
.
dropout
(
class_predictions_net
,
keep_prob
=
self
.
_dropout_keep_prob
)
...
...
@@ -267,7 +266,7 @@ class WeightSharedConvolutionalClassHead(head.Head):
conv_op
=
slim
.
conv2d
class_predictions_with_background
=
conv_op
(
class_predictions_net
,
num_predictions_per_location
*
num_class_slots
,
num_predictions_per_location
*
self
.
_
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
activation_fn
=
None
,
stride
=
1
,
padding
=
'SAME'
,
normalizer_fn
=
None
,
...
...
@@ -280,5 +279,6 @@ class WeightSharedConvolutionalClassHead(head.Head):
class_predictions_with_background
=
self
.
_score_converter_fn
(
class_predictions_with_background
)
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
-
1
,
num_class_slots
])
class_predictions_with_background
,
[
batch_size
,
-
1
,
self
.
_num_class_slots
])
return
class_predictions_with_background
research/object_detection/predictors/heads/class_head_test.py
View file @
e00e0e13
...
...
@@ -46,7 +46,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
def
test_prediction_size
(
self
):
class_prediction_head
=
class_head
.
MaskRCNNClassHead
(
is_training
=
False
,
num_class
e
s
=
20
,
num_class
_slot
s
=
20
,
fc_hyperparams_fn
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
True
,
dropout_keep_prob
=
0.5
)
...
...
@@ -54,7 +54,7 @@ class MaskRCNNClassHeadTest(test_case.TestCase):
[
64
,
7
,
7
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
prediction
=
class_prediction_head
.
predict
(
features
=
roi_pooled_features
,
num_predictions_per_location
=
1
)
self
.
assertAllEqual
([
64
,
1
,
2
1
],
prediction
.
get_shape
().
as_list
())
self
.
assertAllEqual
([
64
,
1
,
2
0
],
prediction
.
get_shape
().
as_list
())
class
ConvolutionalClassPredictorTest
(
test_case
.
TestCase
):
...
...
@@ -80,7 +80,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
def
test_prediction_size
(
self
):
class_prediction_head
=
class_head
.
ConvolutionalClassHead
(
is_training
=
True
,
num_class
e
s
=
20
,
num_class
_slot
s
=
20
,
use_dropout
=
True
,
dropout_keep_prob
=
0.5
,
kernel_size
=
3
)
...
...
@@ -89,7 +89,7 @@ class ConvolutionalClassPredictorTest(test_case.TestCase):
class_predictions
=
class_prediction_head
.
predict
(
features
=
image_feature
,
num_predictions_per_location
=
1
)
self
.
assertAllEqual
([
64
,
323
,
2
1
],
self
.
assertAllEqual
([
64
,
323
,
2
0
],
class_predictions
.
get_shape
().
as_list
())
...
...
@@ -115,13 +115,13 @@ class WeightSharedConvolutionalClassPredictorTest(test_case.TestCase):
def
test_prediction_size
(
self
):
class_prediction_head
=
(
class_head
.
WeightSharedConvolutionalClassHead
(
num_class
e
s
=
20
))
class_head
.
WeightSharedConvolutionalClassHead
(
num_class
_slot
s
=
20
))
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
class_predictions
=
class_prediction_head
.
predict
(
features
=
image_feature
,
num_predictions_per_location
=
1
)
self
.
assertAllEqual
([
64
,
323
,
2
1
],
class_predictions
.
get_shape
().
as_list
())
self
.
assertAllEqual
([
64
,
323
,
2
0
],
class_predictions
.
get_shape
().
as_list
())
if
__name__
==
'__main__'
:
...
...
research/object_detection/predictors/heads/keras_box_head.py
View file @
e00e0e13
...
...
@@ -91,7 +91,7 @@ class ConvolutionalBoxHead(head.KerasHead):
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_box_code_size
,
[
1
,
1
],
name
=
'BoxEncodingPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
else
:
self
.
_box_encoder_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
...
...
@@ -99,7 +99,7 @@ class ConvolutionalBoxHead(head.KerasHead):
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
name
=
'BoxEncodingPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
...
...
research/object_detection/predictors/heads/keras_class_head.py
View file @
e00e0e13
...
...
@@ -29,7 +29,7 @@ class ConvolutionalClassHead(head.KerasHead):
def
__init__
(
self
,
is_training
,
num_class
e
s
,
num_class
_slot
s
,
use_dropout
,
dropout_keep_prob
,
kernel_size
,
...
...
@@ -43,7 +43,8 @@ class ConvolutionalClassHead(head.KerasHead):
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: Number of classes.
num_class_slots: number of class slots. Note that num_class_slots may or
may not include an implicit background category.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
...
...
@@ -73,13 +74,12 @@ class ConvolutionalClassHead(head.KerasHead):
"""
super
(
ConvolutionalClassHead
,
self
).
__init__
(
name
=
name
)
self
.
_is_training
=
is_training
self
.
_num_classes
=
num_classes
self
.
_use_dropout
=
use_dropout
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_kernel_size
=
kernel_size
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
self
.
_use_depthwise
=
use_depthwise
self
.
_num_class_slots
=
self
.
_
num_class
es
+
1
self
.
_num_class_slots
=
num_class
_slots
self
.
_class_predictor_layers
=
[]
...
...
@@ -110,7 +110,7 @@ class ConvolutionalClassHead(head.KerasHead):
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
self
.
_num_class_slots
,
[
1
,
1
],
name
=
'ClassPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
else
:
self
.
_class_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
...
...
@@ -120,7 +120,7 @@ class ConvolutionalClassHead(head.KerasHead):
name
=
'ClassPredictor'
,
bias_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
),
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
...
...
@@ -131,7 +131,7 @@ class ConvolutionalClassHead(head.KerasHead):
Returns:
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_class
es + 1
] representing the class
[batch_size, num_anchors, num_class
_slots
] representing the class
predictions for the proposals.
"""
# Add a slot for the background class.
...
...
research/object_detection/predictors/heads/keras_class_head_test.py
View file @
e00e0e13
...
...
@@ -45,7 +45,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
conv_hyperparams
=
self
.
_build_conv_hyperparams
()
class_prediction_head
=
keras_class_head
.
ConvolutionalClassHead
(
is_training
=
True
,
num_class
e
s
=
20
,
num_class
_slot
s
=
20
,
use_dropout
=
True
,
dropout_keep_prob
=
0.5
,
kernel_size
=
3
,
...
...
@@ -56,7 +56,7 @@ class ConvolutionalKerasClassPredictorTest(test_case.TestCase):
image_feature
=
tf
.
random_uniform
(
[
64
,
17
,
19
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
class_predictions
=
class_prediction_head
(
image_feature
,)
self
.
assertAllEqual
([
64
,
323
,
2
1
],
self
.
assertAllEqual
([
64
,
323
,
2
0
],
class_predictions
.
get_shape
().
as_list
())
# TODO(kaftan): Remove conditional after CMLE moves to TF 1.10
...
...
research/object_detection/predictors/heads/keras_mask_head.py
View file @
e00e0e13
...
...
@@ -124,7 +124,7 @@ class ConvolutionalMaskHead(head.KerasHead):
tf
.
keras
.
layers
.
Conv2D
(
num_predictions_per_location
*
num_mask_channels
,
[
1
,
1
],
name
=
'MaskPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
else
:
self
.
_mask_predictor_layers
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
...
...
@@ -132,7 +132,7 @@ class ConvolutionalMaskHead(head.KerasHead):
[
self
.
_kernel_size
,
self
.
_kernel_size
],
padding
=
'SAME'
,
name
=
'MaskPredictor'
,
**
conv_hyperparams
.
params
(
activation
=
Non
e
)))
**
conv_hyperparams
.
params
(
use_bias
=
Tru
e
)))
def
_predict
(
self
,
features
):
"""Predicts boxes.
...
...
research/object_detection/predictors/heads/mask_head.py
View file @
e00e0e13
...
...
@@ -23,6 +23,7 @@ import math
import
tensorflow
as
tf
from
object_detection.predictors.heads
import
head
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
...
...
@@ -41,7 +42,8 @@ class MaskRCNNMaskHead(head.Head):
mask_width
=
14
,
mask_prediction_num_conv_layers
=
2
,
mask_prediction_conv_depth
=
256
,
masks_are_class_agnostic
=
False
):
masks_are_class_agnostic
=
False
,
convolve_then_upsample
=
False
):
"""Constructor.
Args:
...
...
@@ -62,6 +64,10 @@ class MaskRCNNMaskHead(head.Head):
image features.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
convolve_then_upsample: Whether to apply convolutions on mask features
before upsampling using nearest neighbor resizing. Otherwise, mask
features are resized to [`mask_height`, `mask_width`] using bilinear
resizing before applying convolutions.
Raises:
ValueError: conv_hyperparams_fn is None.
...
...
@@ -74,6 +80,7 @@ class MaskRCNNMaskHead(head.Head):
self
.
_mask_prediction_num_conv_layers
=
mask_prediction_num_conv_layers
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_masks_are_class_agnostic
=
masks_are_class_agnostic
self
.
_convolve_then_upsample
=
convolve_then_upsample
if
conv_hyperparams_fn
is
None
:
raise
ValueError
(
'conv_hyperparams_fn is None.'
)
...
...
@@ -135,17 +142,30 @@ class MaskRCNNMaskHead(head.Head):
num_conv_channels
=
self
.
_get_mask_predictor_conv_depth
(
num_feature_channels
,
self
.
_num_classes
)
with
slim
.
arg_scope
(
self
.
_conv_hyperparams_fn
()):
upsampled_features
=
tf
.
image
.
resize_bilinear
(
features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
if
not
self
.
_convolve_then_upsample
:
features
=
tf
.
image
.
resize_bilinear
(
features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
for
_
in
range
(
self
.
_mask_prediction_num_conv_layers
-
1
):
upsampled_features
=
slim
.
conv2d
(
upsampled_features
,
features
=
slim
.
conv2d
(
features
,
num_outputs
=
num_conv_channels
,
kernel_size
=
[
3
,
3
])
if
self
.
_convolve_then_upsample
:
# Replace Transposed Convolution with a Nearest Neighbor upsampling step
# followed by 3x3 convolution.
height_scale
=
self
.
_mask_height
/
features
.
shape
[
1
].
value
width_scale
=
self
.
_mask_width
/
features
.
shape
[
2
].
value
features
=
ops
.
nearest_neighbor_upsampling
(
features
,
height_scale
=
height_scale
,
width_scale
=
width_scale
)
features
=
slim
.
conv2d
(
features
,
num_outputs
=
num_conv_channels
,
kernel_size
=
[
3
,
3
])
num_masks
=
1
if
self
.
_masks_are_class_agnostic
else
self
.
_num_classes
mask_predictions
=
slim
.
conv2d
(
upsampled_
features
,
features
,
num_outputs
=
num_masks
,
activation_fn
=
None
,
normalizer_fn
=
None
,
...
...
research/object_detection/predictors/heads/mask_head_test.py
View file @
e00e0e13
...
...
@@ -58,6 +58,22 @@ class MaskRCNNMaskHeadTest(test_case.TestCase):
features
=
roi_pooled_features
,
num_predictions_per_location
=
1
)
self
.
assertAllEqual
([
64
,
1
,
20
,
14
,
14
],
prediction
.
get_shape
().
as_list
())
def
test_prediction_size_with_convolve_then_upsample
(
self
):
mask_prediction_head
=
mask_head
.
MaskRCNNMaskHead
(
num_classes
=
20
,
conv_hyperparams_fn
=
self
.
_build_arg_scope_with_hyperparams
(),
mask_height
=
28
,
mask_width
=
28
,
mask_prediction_num_conv_layers
=
2
,
mask_prediction_conv_depth
=
256
,
masks_are_class_agnostic
=
True
,
convolve_then_upsample
=
True
)
roi_pooled_features
=
tf
.
random_uniform
(
[
64
,
14
,
14
,
1024
],
minval
=-
10.0
,
maxval
=
10.0
,
dtype
=
tf
.
float32
)
prediction
=
mask_prediction_head
.
predict
(
features
=
roi_pooled_features
,
num_predictions_per_location
=
1
)
self
.
assertAllEqual
([
64
,
1
,
1
,
28
,
28
],
prediction
.
get_shape
().
as_list
())
class
ConvolutionalMaskPredictorTest
(
test_case
.
TestCase
):
...
...
research/object_detection/protos/box_predictor.proto
View file @
e00e0e13
...
...
@@ -138,6 +138,7 @@ message WeightSharedConvolutionalBoxPredictor {
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// head easily.
// Next id: 15
message
MaskRCNNBoxPredictor
{
// Hyperparameters for fully connected ops used in the box predictor.
optional
Hyperparams
fc_hyperparams
=
1
;
...
...
@@ -178,6 +179,12 @@ message MaskRCNNBoxPredictor {
// Whether to use one box for all classes rather than a different box for each
// class.
optional
bool
share_box_across_classes
=
13
[
default
=
false
];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional
bool
convolve_then_upsample_masks
=
14
[
default
=
false
];
}
message
RfcnBoxPredictor
{
...
...
research/object_detection/protos/faster_rcnn.proto
View file @
e00e0e13
...
...
@@ -164,6 +164,10 @@ message FasterRcnn {
// Whether the masks present in groundtruth should be resized in the model to
// match the image size.
optional
bool
resize_masks
=
36
[
default
=
true
];
// If True, uses implementation of ops with static shape guarantees when
// running evaluation (specifically not is_training if False).
optional
bool
use_static_shapes_for_eval
=
37
[
default
=
false
];
}
...
...
research/object_detection/protos/preprocessor.proto
View file @
e00e0e13
...
...
@@ -155,6 +155,9 @@ message RandomCropImage {
// value, it is removed from the new image.
optional
float
overlap_thresh
=
6
[
default
=
0.3
];
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
8
[
default
=
true
];
// Probability of keeping the original image.
optional
float
random_coef
=
7
[
default
=
0.0
];
}
...
...
@@ -194,6 +197,9 @@ message RandomCropPadImage {
// value, it is removed from the new image.
optional
float
overlap_thresh
=
6
[
default
=
0.3
];
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
11
[
default
=
true
];
// Probability of keeping the original image during the crop operation.
optional
float
random_coef
=
7
[
default
=
0.0
];
...
...
@@ -217,6 +223,9 @@ message RandomCropToAspectRatio {
// ratio between a cropped bounding box and the original is less than this
// value, it is removed from the new image.
optional
float
overlap_thresh
=
2
[
default
=
0.3
];
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
3
[
default
=
true
];
}
// Randomly adds black square patches to an image.
...
...
@@ -285,6 +294,9 @@ message SSDRandomCropOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional
float
overlap_thresh
=
6
;
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
8
[
default
=
true
];
// Probability a crop operation is skipped.
optional
float
random_coef
=
7
;
}
...
...
@@ -315,6 +327,9 @@ message SSDRandomCropPadOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional
float
overlap_thresh
=
6
;
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
13
[
default
=
true
];
// Probability a crop operation is skipped.
optional
float
random_coef
=
7
;
...
...
@@ -353,6 +368,9 @@ message SSDRandomCropFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional
float
overlap_thresh
=
6
;
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
8
[
default
=
true
];
// Probability a crop operation is skipped.
optional
float
random_coef
=
7
;
}
...
...
@@ -387,6 +405,9 @@ message SSDRandomCropPadFixedAspectRatioOperation {
// Cropped box area ratio must be above this threhold to be kept.
optional
float
overlap_thresh
=
6
;
// Whether to clip the boxes to the cropped image.
optional
bool
clip_boxes
=
8
[
default
=
true
];
// Probability a crop operation is skipped.
optional
float
random_coef
=
7
;
}
...
...
Prev
1
…
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment