Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
c8e6faf7
Commit
c8e6faf7
authored
Mar 01, 2022
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 431756117
parent
13a5e4fb
Changes
235
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4787 additions
and
0 deletions
+4787
-0
official/vision/modeling/decoders/factory_test.py
official/vision/modeling/decoders/factory_test.py
+159
-0
official/vision/modeling/decoders/fpn.py
official/vision/modeling/decoders/fpn.py
+246
-0
official/vision/modeling/decoders/fpn_test.py
official/vision/modeling/decoders/fpn_test.py
+117
-0
official/vision/modeling/decoders/nasfpn.py
official/vision/modeling/decoders/nasfpn.py
+368
-0
official/vision/modeling/decoders/nasfpn_test.py
official/vision/modeling/decoders/nasfpn_test.py
+59
-0
official/vision/modeling/factory.py
official/vision/modeling/factory.py
+387
-0
official/vision/modeling/factory_3d.py
official/vision/modeling/factory_3d.py
+103
-0
official/vision/modeling/factory_test.py
official/vision/modeling/factory_test.py
+132
-0
official/vision/modeling/heads/__init__.py
official/vision/modeling/heads/__init__.py
+22
-0
official/vision/modeling/heads/dense_prediction_heads.py
official/vision/modeling/heads/dense_prediction_heads.py
+517
-0
official/vision/modeling/heads/dense_prediction_heads_test.py
...cial/vision/modeling/heads/dense_prediction_heads_test.py
+148
-0
official/vision/modeling/heads/instance_heads.py
official/vision/modeling/heads/instance_heads.py
+444
-0
official/vision/modeling/heads/instance_heads_test.py
official/vision/modeling/heads/instance_heads_test.py
+135
-0
official/vision/modeling/heads/segmentation_heads.py
official/vision/modeling/heads/segmentation_heads.py
+441
-0
official/vision/modeling/heads/segmentation_heads_test.py
official/vision/modeling/heads/segmentation_heads_test.py
+107
-0
official/vision/modeling/layers/__init__.py
official/vision/modeling/layers/__init__.py
+44
-0
official/vision/modeling/layers/box_sampler.py
official/vision/modeling/layers/box_sampler.py
+93
-0
official/vision/modeling/layers/deeplab.py
official/vision/modeling/layers/deeplab.py
+211
-0
official/vision/modeling/layers/deeplab_test.py
official/vision/modeling/layers/deeplab_test.py
+53
-0
official/vision/modeling/layers/detection_generator.py
official/vision/modeling/layers/detection_generator.py
+1001
-0
No files found.
official/vision/modeling/decoders/factory_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
official.vision
import
configs
from
official.vision.configs
import
decoders
as
decoders_cfg
from
official.vision.modeling
import
decoders
from
official.vision.modeling.decoders
import
factory
class
FactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
use_separable_conv
=
[
True
,
False
]))
def
test_fpn_decoder_creation
(
self
,
num_filters
,
use_separable_conv
):
"""Test creation of FPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
FPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'fpn'
,
fpn
=
decoders_cfg
.
FPN
(
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
num_repeats
=
[
3
,
5
],
use_separable_conv
=
[
True
,
False
]))
def
test_nasfpn_decoder_creation
(
self
,
num_filters
,
num_repeats
,
use_separable_conv
):
"""Test creation of NASFPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
NASFPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'nasfpn'
,
nasfpn
=
decoders_cfg
.
NASFPN
(
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
level
=
[
3
,
4
],
dilation_rates
=
[[
6
,
12
,
18
],
[
6
,
12
]],
num_filters
=
[
128
,
256
]))
def
test_aspp_decoder_creation
(
self
,
level
,
dilation_rates
,
num_filters
):
"""Test creation of ASPP decoder."""
input_specs
=
{
'1'
:
tf
.
TensorShape
([
1
,
128
,
128
,
3
])}
network
=
decoders
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
,
use_sync_bn
=
True
)
model_config
=
configs
.
semantic_segmentation
.
SemanticSegmentationModel
()
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'aspp'
,
aspp
=
decoders_cfg
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
# Due to calling `super().get_config()` in aspp layer, everything but the
# the name of two layer instances are the same, so we force equal name so it
# will not give false alarm.
factory_network_config
[
'name'
]
=
network_config
[
'name'
]
self
.
assertEqual
(
network_config
,
factory_network_config
)
def
test_identity_decoder_creation
(
self
):
"""Test creation of identity decoder."""
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
num_classes
=
2
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'identity'
,
identity
=
decoders_cfg
.
Identity
())
factory_network
=
factory
.
build_decoder
(
input_specs
=
None
,
model_config
=
model_config
)
self
.
assertIsNone
(
factory_network
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/fpn.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Feature Pyramid Networks (FPN)."""
from
typing
import
Any
,
Mapping
,
Optional
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
FPN
(
tf
.
keras
.
Model
):
"""Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
num_filters
:
int
=
256
,
fusion_type
:
str
=
'sum'
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a Feature Pyramid Network (FPN).
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
num_filters: An `int` number of filters in FPN layers.
fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
concat for feature fusion.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'fusion_type'
:
fusion_type
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
use_separable_conv
:
conv2d
=
tf
.
keras
.
layers
.
SeparableConv2D
else
:
conv2d
=
tf
.
keras
.
layers
.
Conv2D
if
use_sync_bn
:
norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
norm
=
tf
.
keras
.
layers
.
BatchNormalization
activation_fn
=
tf
.
keras
.
layers
.
Activation
(
tf_utils
.
get_activation
(
activation
))
# Build input feature pyramid.
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
# Get input feature pyramid from backbone.
logging
.
info
(
'FPN input_specs: %s'
,
input_specs
)
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
backbone_max_level
=
min
(
int
(
max
(
inputs
.
keys
())),
max_level
)
# Build lateral connections.
feats_lateral
=
{}
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats_lateral
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
inputs
[
str
(
level
)])
# Build top-down path.
feats
=
{
str
(
backbone_max_level
):
feats_lateral
[
str
(
backbone_max_level
)]}
for
level
in
range
(
backbone_max_level
-
1
,
min_level
-
1
,
-
1
):
feat_a
=
spatial_transform_ops
.
nearest_upsampling
(
feats
[
str
(
level
+
1
)],
2
)
feat_b
=
feats_lateral
[
str
(
level
)]
if
fusion_type
==
'sum'
:
feats
[
str
(
level
)]
=
feat_a
+
feat_b
elif
fusion_type
==
'concat'
:
feats
[
str
(
level
)]
=
tf
.
concat
([
feat_a
,
feat_b
],
axis
=-
1
)
else
:
raise
ValueError
(
'Fusion type {} not supported.'
.
format
(
fusion_type
))
# TODO(xianzhi): consider to remove bias in conv2d.
# Build post-hoc 3x3 convolution kernel.
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
1
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats
[
str
(
level
)])
# TODO(xianzhi): consider to remove bias in conv2d.
# Build coarser FPN levels introduced for RetinaNet.
for
level
in
range
(
backbone_max_level
+
1
,
max_level
+
1
):
feats_in
=
feats
[
str
(
level
-
1
)]
if
level
>
backbone_max_level
+
1
:
feats_in
=
activation_fn
(
feats_in
)
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
2
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats_in
)
# Apply batch norm layers.
for
level
in
range
(
min_level
,
max_level
+
1
):
feats
[
str
(
level
)]
=
norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
feats
[
str
(
level
)])
self
.
_output_specs
=
{
str
(
level
):
feats
[
str
(
level
)].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
super
(
FPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'fpn'
)
def
build_fpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'fpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `fpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
FPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
fusion_type
=
decoder_cfg
.
fusion_type
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/fpn_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
mobilenet
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
fpn
class
FPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
,
'sum'
),
(
256
,
3
,
7
,
True
,
'concat'
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
,
fusion_type
):
"""Test creation of FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
fusion_type
=
fusion_type
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation_with_mobilenet
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of FPN with mobilenet backbone."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
mobilenet
.
MobileNet
(
model_id
=
'MobileNetV2'
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
input_specs
=
resnet
.
ResNet
(
model_id
=
50
).
output_specs
,
min_level
=
3
,
max_level
=
7
,
num_filters
=
256
,
fusion_type
=
'sum'
,
use_separable_conv
=
False
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
fpn
.
FPN
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
fpn
.
FPN
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/nasfpn.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS
=
[
(
4
,
'attention'
,
(
1
,
3
),
False
),
(
4
,
'sum'
,
(
1
,
5
),
False
),
(
3
,
'sum'
,
(
0
,
6
),
True
),
(
4
,
'sum'
,
(
6
,
7
),
True
),
(
5
,
'attention'
,
(
7
,
8
),
True
),
(
7
,
'attention'
,
(
6
,
9
),
True
),
(
6
,
'attention'
,
(
9
,
10
),
True
),
]
class
BlockSpec
():
"""A container class that specifies the block configuration for NAS-FPN."""
def
__init__
(
self
,
level
:
int
,
combine_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
combine_fn
=
combine_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if
not
block_specs
:
block_specs
=
NASFPN_BLOCK_SPECS
logging
.
info
(
'Building NAS-FPN block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
NASFPN
(
tf
.
keras
.
Model
):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
num_filters
:
int
=
256
,
num_repeats
:
int
=
5
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'num_repeats'
:
num_repeats
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_num_repeats
=
num_repeats
self
.
_conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
if
self
.
_config_dict
[
'use_separable_conv'
]:
self
.
_conv_kwargs
=
{
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
else
:
self
.
_conv_kwargs
=
{
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
self
.
_norm_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_norm_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
# Gets input feature pyramid from backbone.
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
# Projects the input features.
feats
=
[]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
if
str
(
level
)
in
inputs
.
keys
():
feats
.
append
(
self
.
_resample_feature_map
(
inputs
[
str
(
level
)],
level
,
level
,
self
.
_config_dict
[
'num_filters'
]))
else
:
feats
.
append
(
self
.
_resample_feature_map
(
feats
[
-
1
],
level
-
1
,
level
,
self
.
_config_dict
[
'num_filters'
]))
# Repeatly builds the NAS-FPN modules.
for
_
in
range
(
self
.
_num_repeats
):
output_feats
=
self
.
_build_feature_pyramid
(
feats
)
feats
=
[
output_feats
[
level
]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)]
self
.
_output_specs
=
{
str
(
level
):
output_feats
[
level
].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
output_feats
=
{
str
(
level
):
output_feats
[
level
]
for
level
in
output_feats
.
keys
()}
super
(
NASFPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
output_feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
_resample_feature_map
(
self
,
inputs
,
input_level
,
target_level
,
target_num_filters
=
256
):
x
=
inputs
_
,
_
,
_
,
input_num_filters
=
x
.
get_shape
().
as_list
()
if
input_num_filters
!=
target_num_filters
:
x
=
self
.
_conv_op
(
filters
=
target_num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
x
)
x
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
x
)
if
input_level
<
target_level
:
stride
=
int
(
2
**
(
target_level
-
input_level
))
return
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
stride
,
strides
=
stride
,
padding
=
'same'
)(
x
)
if
input_level
>
target_level
:
scale
=
int
(
2
**
(
input_level
-
target_level
))
return
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype
=
tf
.
keras
.
layers
.
Layer
().
dtype_policy
.
compute_dtype
if
(
compute_dtype
is
not
None
)
and
(
x
.
dtype
!=
compute_dtype
):
return
tf
.
cast
(
x
,
dtype
=
compute_dtype
)
else
:
return
x
def
_global_attention
(
self
,
feat0
,
feat1
):
m
=
tf
.
math
.
reduce_max
(
feat0
,
axis
=
[
1
,
2
],
keepdims
=
True
)
m
=
tf
.
math
.
sigmoid
(
m
)
return
feat0
+
feat1
*
m
def
_build_feature_pyramid
(
self
,
feats
):
num_output_connections
=
[
0
]
*
len
(
feats
)
num_output_levels
=
self
.
_max_level
-
self
.
_min_level
+
1
feat_levels
=
list
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
))
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
new_level
=
block_spec
.
level
# Checks the range of input_offsets.
for
input_offset
in
block_spec
.
input_offsets
:
if
input_offset
>=
len
(
feats
):
raise
ValueError
(
'input_offset ({}) is larger than num feats({})'
.
format
(
input_offset
,
len
(
feats
)))
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
# Update graph with inputs.
node0
=
feats
[
input0
]
node0_level
=
feat_levels
[
input0
]
num_output_connections
[
input0
]
+=
1
node0
=
self
.
_resample_feature_map
(
node0
,
node0_level
,
new_level
)
node1
=
feats
[
input1
]
node1_level
=
feat_levels
[
input1
]
num_output_connections
[
input1
]
+=
1
node1
=
self
.
_resample_feature_map
(
node1
,
node1_level
,
new_level
)
# Combine node0 and node1 to create new feat.
if
block_spec
.
combine_fn
==
'sum'
:
new_node
=
node0
+
node1
elif
block_spec
.
combine_fn
==
'attention'
:
if
node0_level
>=
node1_level
:
new_node
=
self
.
_global_attention
(
node0
,
node1
)
else
:
new_node
=
self
.
_global_attention
(
node1
,
node0
)
else
:
raise
ValueError
(
'unknown combine_fn `{}`.'
.
format
(
block_spec
.
combine_fn
))
# Add intermediate nodes that do not have any connections to output.
if
block_spec
.
is_output
:
for
j
,
(
feat
,
feat_level
,
num_output
)
in
enumerate
(
zip
(
feats
,
feat_levels
,
num_output_connections
)):
if
num_output
==
0
and
feat_level
==
new_level
:
num_output_connections
[
j
]
+=
1
feat_
=
self
.
_resample_feature_map
(
feat
,
feat_level
,
new_level
)
new_node
+=
feat_
new_node
=
self
.
_activation
(
new_node
)
new_node
=
self
.
_conv_op
(
filters
=
self
.
_config_dict
[
'num_filters'
],
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
new_node
)
new_node
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
new_node
)
feats
.
append
(
new_node
)
feat_levels
.
append
(
new_level
)
num_output_connections
.
append
(
0
)
output_feats
=
{}
for
i
in
range
(
len
(
feats
)
-
num_output_levels
,
len
(
feats
)):
level
=
feat_levels
[
i
]
output_feats
[
level
]
=
feats
[
i
]
logging
.
info
(
'Output feature pyramid: %s'
,
output_feats
)
return
output_feats
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'nasfpn'
)
def
build_nasfpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'nasfpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `nasfpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
NASFPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
num_repeats
=
decoder_cfg
.
num_repeats
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/nasfpn_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for NAS-FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
nasfpn
class
NASFPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of NAS-FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
num_filters
=
256
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
nasfpn
.
NASFPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
num_filters
],
feats
[
str
(
level
)].
shape
.
as_list
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/factory.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
from
typing
import
Optional
import
tensorflow
as
tf
from
official.vision.configs
import
image_classification
as
classification_cfg
from
official.vision.configs
import
maskrcnn
as
maskrcnn_cfg
from
official.vision.configs
import
retinanet
as
retinanet_cfg
from
official.vision.configs
import
semantic_segmentation
as
segmentation_cfg
from
official.vision.modeling
import
backbones
from
official.vision.modeling
import
classification_model
from
official.vision.modeling
import
decoders
from
official.vision.modeling
import
maskrcnn_model
from
official.vision.modeling
import
retinanet_model
from
official.vision.modeling
import
segmentation_model
from
official.vision.modeling.heads
import
dense_prediction_heads
from
official.vision.modeling.heads
import
instance_heads
from
official.vision.modeling.heads
import
segmentation_heads
from
official.vision.modeling.layers
import
detection_generator
from
official.vision.modeling.layers
import
mask_sampler
from
official.vision.modeling.layers
import
roi_aligner
from
official.vision.modeling.layers
import
roi_generator
from
official.vision.modeling.layers
import
roi_sampler
def
build_classification_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
classification_cfg
.
ImageClassificationModel
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
skip_logits_layer
:
bool
=
False
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds the classification model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
model_config
.
num_classes
,
input_specs
=
input_specs
,
dropout_rate
=
model_config
.
dropout_rate
,
kernel_initializer
=
model_config
.
kernel_initializer
,
kernel_regularizer
=
l2_regularizer
,
add_head_batch_norm
=
model_config
.
add_head_batch_norm
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
skip_logits_layer
=
skip_logits_layer
)
return
model
def
build_maskrcnn
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
maskrcnn_cfg
.
MaskRCNN
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
Model
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Mask R-CNN model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
backbone_features
=
backbone
(
tf
.
keras
.
Input
(
input_specs
.
shape
[
1
:]))
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
rpn_head_config
=
model_config
.
rpn_head
roi_generator_config
=
model_config
.
roi_generator
roi_sampler_config
=
model_config
.
roi_sampler
roi_aligner_config
=
model_config
.
roi_aligner
detection_head_config
=
model_config
.
detection_head
generator_config
=
model_config
.
detection_generator
num_anchors_per_location
=
(
len
(
model_config
.
anchor
.
aspect_ratios
)
*
model_config
.
anchor
.
num_scales
)
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_anchors_per_location
=
num_anchors_per_location
,
num_convs
=
rpn_head_config
.
num_convs
,
num_filters
=
rpn_head_config
.
num_filters
,
use_separable_conv
=
rpn_head_config
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
model_config
.
num_classes
,
num_convs
=
detection_head_config
.
num_convs
,
num_filters
=
detection_head_config
.
num_filters
,
use_separable_conv
=
detection_head_config
.
use_separable_conv
,
num_fcs
=
detection_head_config
.
num_fcs
,
fc_dims
=
detection_head_config
.
fc_dims
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
name
=
'detection_head'
)
if
decoder
:
decoder_features
=
decoder
(
backbone_features
)
rpn_head
(
decoder_features
)
if
roi_sampler_config
.
cascade_iou_thresholds
:
detection_head_cascade
=
[
detection_head
]
for
cascade_num
in
range
(
len
(
roi_sampler_config
.
cascade_iou_thresholds
)):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
model_config
.
num_classes
,
num_convs
=
detection_head_config
.
num_convs
,
num_filters
=
detection_head_config
.
num_filters
,
use_separable_conv
=
detection_head_config
.
use_separable_conv
,
num_fcs
=
detection_head_config
.
num_fcs
,
fc_dims
=
detection_head_config
.
fc_dims
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
name
=
'detection_head_{}'
.
format
(
cascade_num
+
1
))
detection_head_cascade
.
append
(
detection_head
)
detection_head
=
detection_head_cascade
roi_generator_obj
=
roi_generator
.
MultilevelROIGenerator
(
pre_nms_top_k
=
roi_generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
roi_generator_config
.
pre_nms_score_threshold
,
pre_nms_min_size_threshold
=
(
roi_generator_config
.
pre_nms_min_size_threshold
),
nms_iou_threshold
=
roi_generator_config
.
nms_iou_threshold
,
num_proposals
=
roi_generator_config
.
num_proposals
,
test_pre_nms_top_k
=
roi_generator_config
.
test_pre_nms_top_k
,
test_pre_nms_score_threshold
=
(
roi_generator_config
.
test_pre_nms_score_threshold
),
test_pre_nms_min_size_threshold
=
(
roi_generator_config
.
test_pre_nms_min_size_threshold
),
test_nms_iou_threshold
=
roi_generator_config
.
test_nms_iou_threshold
,
test_num_proposals
=
roi_generator_config
.
test_num_proposals
,
use_batched_nms
=
roi_generator_config
.
use_batched_nms
)
roi_sampler_cascade
=
[]
roi_sampler_obj
=
roi_sampler
.
ROISampler
(
mix_gt_boxes
=
roi_sampler_config
.
mix_gt_boxes
,
num_sampled_rois
=
roi_sampler_config
.
num_sampled_rois
,
foreground_fraction
=
roi_sampler_config
.
foreground_fraction
,
foreground_iou_threshold
=
roi_sampler_config
.
foreground_iou_threshold
,
background_iou_high_threshold
=
(
roi_sampler_config
.
background_iou_high_threshold
),
background_iou_low_threshold
=
(
roi_sampler_config
.
background_iou_low_threshold
))
roi_sampler_cascade
.
append
(
roi_sampler_obj
)
# Initialize addtional roi simplers for cascade heads.
if
roi_sampler_config
.
cascade_iou_thresholds
:
for
iou
in
roi_sampler_config
.
cascade_iou_thresholds
:
roi_sampler_obj
=
roi_sampler
.
ROISampler
(
mix_gt_boxes
=
False
,
num_sampled_rois
=
roi_sampler_config
.
num_sampled_rois
,
foreground_iou_threshold
=
iou
,
background_iou_high_threshold
=
iou
,
background_iou_low_threshold
=
0.0
,
skip_subsampling
=
True
)
roi_sampler_cascade
.
append
(
roi_sampler_obj
)
roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
roi_aligner_config
.
crop_size
,
sample_offset
=
roi_aligner_config
.
sample_offset
)
detection_generator_obj
=
detection_generator
.
DetectionGenerator
(
apply_nms
=
generator_config
.
apply_nms
,
pre_nms_top_k
=
generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
generator_config
.
pre_nms_score_threshold
,
nms_iou_threshold
=
generator_config
.
nms_iou_threshold
,
max_num_detections
=
generator_config
.
max_num_detections
,
nms_version
=
generator_config
.
nms_version
,
use_cpu_nms
=
generator_config
.
use_cpu_nms
,
soft_nms_sigma
=
generator_config
.
soft_nms_sigma
)
if
model_config
.
include_mask
:
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
model_config
.
num_classes
,
upsample_factor
=
model_config
.
mask_head
.
upsample_factor
,
num_convs
=
model_config
.
mask_head
.
num_convs
,
num_filters
=
model_config
.
mask_head
.
num_filters
,
use_separable_conv
=
model_config
.
mask_head
.
use_separable_conv
,
activation
=
model_config
.
norm_activation
.
activation
,
norm_momentum
=
model_config
.
norm_activation
.
norm_momentum
,
norm_epsilon
=
model_config
.
norm_activation
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
class_agnostic
=
model_config
.
mask_head
.
class_agnostic
)
mask_sampler_obj
=
mask_sampler
.
MaskSampler
(
mask_target_size
=
(
model_config
.
mask_roi_aligner
.
crop_size
*
model_config
.
mask_head
.
upsample_factor
),
num_sampled_masks
=
model_config
.
mask_sampler
.
num_sampled_masks
)
mask_roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
model_config
.
mask_roi_aligner
.
crop_size
,
sample_offset
=
model_config
.
mask_roi_aligner
.
sample_offset
)
else
:
mask_head
=
None
mask_sampler_obj
=
None
mask_roi_aligner_obj
=
None
model
=
maskrcnn_model
.
MaskRCNNModel
(
backbone
=
backbone
,
decoder
=
decoder
,
rpn_head
=
rpn_head
,
detection_head
=
detection_head
,
roi_generator
=
roi_generator_obj
,
roi_sampler
=
roi_sampler_cascade
,
roi_aligner
=
roi_aligner_obj
,
detection_generator
=
detection_generator_obj
,
mask_head
=
mask_head
,
mask_sampler
=
mask_sampler_obj
,
mask_roi_aligner
=
mask_roi_aligner_obj
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
cascade_class_ensemble
=
detection_head_config
.
cascade_class_ensemble
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_scales
=
model_config
.
anchor
.
num_scales
,
aspect_ratios
=
model_config
.
anchor
.
aspect_ratios
,
anchor_size
=
model_config
.
anchor
.
anchor_size
)
return
model
def
build_retinanet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
retinanet_cfg
.
RetinaNet
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds RetinaNet model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
backbone_features
=
backbone
(
tf
.
keras
.
Input
(
input_specs
.
shape
[
1
:]))
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
head_config
=
model_config
.
head
generator_config
=
model_config
.
detection_generator
num_anchors_per_location
=
(
len
(
model_config
.
anchor
.
aspect_ratios
)
*
model_config
.
anchor
.
num_scales
)
head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_classes
=
model_config
.
num_classes
,
num_anchors_per_location
=
num_anchors_per_location
,
num_convs
=
head_config
.
num_convs
,
num_filters
=
head_config
.
num_filters
,
attribute_heads
=
[
cfg
.
as_dict
()
for
cfg
in
(
head_config
.
attribute_heads
or
[])
],
use_separable_conv
=
head_config
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
# Builds decoder and head so that their trainable weights are initialized
if
decoder
:
decoder_features
=
decoder
(
backbone_features
)
_
=
head
(
decoder_features
)
detection_generator_obj
=
detection_generator
.
MultilevelDetectionGenerator
(
apply_nms
=
generator_config
.
apply_nms
,
pre_nms_top_k
=
generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
generator_config
.
pre_nms_score_threshold
,
nms_iou_threshold
=
generator_config
.
nms_iou_threshold
,
max_num_detections
=
generator_config
.
max_num_detections
,
nms_version
=
generator_config
.
nms_version
,
use_cpu_nms
=
generator_config
.
use_cpu_nms
,
soft_nms_sigma
=
generator_config
.
soft_nms_sigma
,
tflite_post_processing_config
=
generator_config
.
tflite_post_processing
.
as_dict
())
model
=
retinanet_model
.
RetinaNetModel
(
backbone
,
decoder
,
head
,
detection_generator_obj
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_scales
=
model_config
.
anchor
.
num_scales
,
aspect_ratios
=
model_config
.
anchor
.
aspect_ratios
,
anchor_size
=
model_config
.
anchor
.
anchor_size
)
return
model
def
build_segmentation_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
segmentation_cfg
.
SemanticSegmentationModel
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Segmentation model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
head_config
=
model_config
.
head
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
model_config
.
num_classes
,
level
=
head_config
.
level
,
num_convs
=
head_config
.
num_convs
,
prediction_kernel_size
=
head_config
.
prediction_kernel_size
,
num_filters
=
head_config
.
num_filters
,
use_depthwise_convolution
=
head_config
.
use_depthwise_convolution
,
upsample_factor
=
head_config
.
upsample_factor
,
feature_fusion
=
head_config
.
feature_fusion
,
low_level
=
head_config
.
low_level
,
low_level_num_filters
=
head_config
.
low_level_num_filters
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
mask_scoring_head
=
None
if
model_config
.
mask_scoring_head
:
mask_scoring_head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
model_config
.
num_classes
,
**
model_config
.
mask_scoring_head
.
as_dict
(),
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
model
=
segmentation_model
.
SegmentationModel
(
backbone
,
decoder
,
head
,
mask_scoring_head
=
mask_scoring_head
)
return
model
official/vision/modeling/factory_3d.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
# Import libraries
import
tensorflow
as
tf
from
official.core
import
registry
from
official.vision.configs
import
video_classification
as
video_classification_cfg
from
official.vision.modeling
import
video_classification_model
from
official.vision.modeling
import
backbones
_REGISTERED_MODEL_CLS
=
{}
def
register_model_builder
(
key
:
str
):
"""Decorates a builder of model class.
The builder should be a Callable (a class or a function).
This decorator supports registration of backbone builder as follows:
```
class MyModel(tf.keras.Model):
pass
@register_backbone_builder('mybackbone')
def builder(input_specs, config, l2_reg):
return MyModel(...)
# Builds a MyModel object.
my_backbone = build_backbone_3d(input_specs, config, l2_reg)
```
Args:
key: the key to look up the builder.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of model class.
"""
return
registry
.
register
(
_REGISTERED_MODEL_CLS
,
key
)
def
build_model
(
model_type
:
str
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
video_classification_cfg
.
hyperparams
.
Config
,
num_classes
:
int
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds backbone from a config.
Args:
model_type: string name of model type. It should be consistent with
ModelConfig.model_type.
input_specs: tf.keras.layers.InputSpec.
model_config: a OneOfConfig. Model config.
num_classes: number of classes.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
Returns:
tf.keras.Model instance of the backbone.
"""
model_builder
=
registry
.
lookup
(
_REGISTERED_MODEL_CLS
,
model_type
)
return
model_builder
(
input_specs
,
model_config
,
num_classes
,
l2_regularizer
)
@
register_model_builder
(
'video_classification'
)
def
build_video_classification_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
video_classification_cfg
.
VideoClassificationModel
,
num_classes
:
int
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds the video classification model."""
input_specs_dict
=
{
'image'
:
input_specs
}
norm_activation_config
=
model_config
.
norm_activation
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
model
=
video_classification_model
.
VideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
input_specs_dict
,
dropout_rate
=
model_config
.
dropout_rate
,
aggregate_endpoints
=
model_config
.
aggregate_endpoints
,
kernel_regularizer
=
l2_regularizer
,
require_endpoints
=
model_config
.
require_endpoints
)
return
model
official/vision/modeling/factory_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for factory.py."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.configs
import
backbones
from
official.vision.configs
import
backbones_3d
from
official.vision.configs
import
image_classification
as
classification_cfg
from
official.vision.configs
import
maskrcnn
as
maskrcnn_cfg
from
official.vision.configs
import
retinanet
as
retinanet_cfg
from
official.vision.configs
import
video_classification
as
video_classification_cfg
from
official.vision.modeling
import
factory
from
official.vision.modeling
import
factory_3d
class
ClassificationModelBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
224
,
224
),
5e-5
),
(
'resnet'
,
(
224
,
224
),
None
),
(
'resnet'
,
(
None
,
None
),
5e-5
),
(
'resnet'
,
(
None
,
None
),
None
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
weight_decay
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model_config
=
classification_cfg
.
ImageClassificationModel
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
))
l2_regularizer
=
(
tf
.
keras
.
regularizers
.
l2
(
weight_decay
)
if
weight_decay
else
None
)
_
=
factory
.
build_classification_model
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
class
MaskRCNNBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
640
,
640
)),
(
'resnet'
,
(
None
,
None
)),
)
def
test_builder
(
self
,
backbone_type
,
input_size
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model_config
=
maskrcnn_cfg
.
MaskRCNN
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
))
l2_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
5e-5
)
_
=
factory
.
build_maskrcnn
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
class
RetinaNetBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
640
,
640
),
False
),
(
'resnet'
,
(
None
,
None
),
True
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
has_att_heads
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
if
has_att_heads
:
attribute_heads_config
=
[
retinanet_cfg
.
AttributeHead
(
name
=
'att1'
),
retinanet_cfg
.
AttributeHead
(
name
=
'att2'
,
type
=
'classification'
,
size
=
2
),
]
else
:
attribute_heads_config
=
None
model_config
=
retinanet_cfg
.
RetinaNet
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
),
head
=
retinanet_cfg
.
RetinaNetHead
(
attribute_heads
=
attribute_heads_config
))
l2_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
5e-5
)
_
=
factory
.
build_retinanet
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
if
has_att_heads
:
self
.
assertEqual
(
model_config
.
head
.
attribute_heads
[
0
].
as_dict
(),
dict
(
name
=
'att1'
,
type
=
'regression'
,
size
=
1
))
self
.
assertEqual
(
model_config
.
head
.
attribute_heads
[
1
].
as_dict
(),
dict
(
name
=
'att2'
,
type
=
'classification'
,
size
=
2
))
class
VideoClassificationModelBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet_3d'
,
(
8
,
224
,
224
),
5e-5
),
(
'resnet_3d'
,
(
None
,
None
,
None
),
5e-5
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
weight_decay
):
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
input_size
[
2
],
3
])
model_config
=
video_classification_cfg
.
VideoClassificationModel
(
backbone
=
backbones_3d
.
Backbone3D
(
type
=
backbone_type
))
l2_regularizer
=
(
tf
.
keras
.
regularizers
.
l2
(
weight_decay
)
if
weight_decay
else
None
)
_
=
factory_3d
.
build_video_classification_model
(
input_specs
=
input_specs
,
model_config
=
model_config
,
num_classes
=
2
,
l2_regularizer
=
l2_regularizer
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/heads/__init__.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Heads package definition."""
from
official.vision.modeling.heads.dense_prediction_heads
import
RetinaNetHead
from
official.vision.modeling.heads.dense_prediction_heads
import
RPNHead
from
official.vision.modeling.heads.instance_heads
import
DetectionHead
from
official.vision.modeling.heads.instance_heads
import
MaskHead
from
official.vision.modeling.heads.segmentation_heads
import
SegmentationHead
official/vision/modeling/heads/dense_prediction_heads.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of dense prediction heads."""
from
typing
import
Any
,
Dict
,
List
,
Mapping
,
Optional
,
Union
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RetinaNetHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a RetinaNet head."""
def
__init__
(
self
,
min_level
:
int
,
max_level
:
int
,
num_classes
:
int
,
num_anchors_per_location
:
int
,
num_convs
:
int
=
4
,
num_filters
:
int
=
256
,
attribute_heads
:
Optional
[
List
[
Dict
[
str
,
Any
]]]
=
None
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
num_params_per_anchor
:
int
=
4
,
**
kwargs
):
"""Initializes a RetinaNet head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_classes: An `int` number of classes to predict.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate conv layers.
attribute_heads: If not None, a list that contains a dict for each
additional attribute head. Each dict consists of 3 key-value pairs:
`name`, `type` ('regression' or 'classification'), and `size` (number
of predicted values for each instance).
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
num_params_per_anchor: Number of parameters required to specify an anchor
box. For example, `num_params_per_anchor` would be 4 for axis-aligned
anchor boxes specified by their y-centers, x-centers, heights, and
widths.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
RetinaNetHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_classes'
:
num_classes
,
'num_anchors_per_location'
:
num_anchors_per_location
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'attribute_heads'
:
attribute_heads
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'num_params_per_anchor'
:
num_params_per_anchor
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
# Class net.
self
.
_cls_convs
=
[]
self
.
_cls_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_cls_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
cls_conv_name
=
'classnet-conv_{}'
.
format
(
i
)
self
.
_cls_convs
.
append
(
conv_op
(
name
=
cls_conv_name
,
**
conv_kwargs
))
cls_norm_name
=
'classnet-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_cls_norms
.
append
(
bn_op
(
name
=
cls_norm_name
,
**
bn_kwargs
))
self
.
_cls_norms
.
append
(
this_level_cls_norms
)
classifier_kwargs
=
{
'filters'
:
(
self
.
_config_dict
[
'num_classes'
]
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
classifier_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_classifier
=
conv_op
(
name
=
'scores'
,
**
classifier_kwargs
)
# Box net.
self
.
_box_convs
=
[]
self
.
_box_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_box_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
box_conv_name
=
'boxnet-conv_{}'
.
format
(
i
)
self
.
_box_convs
.
append
(
conv_op
(
name
=
box_conv_name
,
**
conv_kwargs
))
box_norm_name
=
'boxnet-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_box_norms
.
append
(
bn_op
(
name
=
box_norm_name
,
**
bn_kwargs
))
self
.
_box_norms
.
append
(
this_level_box_norms
)
box_regressor_kwargs
=
{
'filters'
:
(
self
.
_config_dict
[
'num_params_per_anchor'
]
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
box_regressor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_box_regressor
=
conv_op
(
name
=
'boxes'
,
**
box_regressor_kwargs
)
# Attribute learning nets.
if
self
.
_config_dict
[
'attribute_heads'
]:
self
.
_att_predictors
=
{}
self
.
_att_convs
=
{}
self
.
_att_norms
=
{}
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]:
att_name
=
att_config
[
'name'
]
att_type
=
att_config
[
'type'
]
att_size
=
att_config
[
'size'
]
att_convs_i
=
[]
att_norms_i
=
[]
# Build conv and norm layers.
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_att_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
att_conv_name
=
'{}-conv_{}'
.
format
(
att_name
,
i
)
att_convs_i
.
append
(
conv_op
(
name
=
att_conv_name
,
**
conv_kwargs
))
att_norm_name
=
'{}-conv-norm_{}_{}'
.
format
(
att_name
,
level
,
i
)
this_level_att_norms
.
append
(
bn_op
(
name
=
att_norm_name
,
**
bn_kwargs
))
att_norms_i
.
append
(
this_level_att_norms
)
self
.
_att_convs
[
att_name
]
=
att_convs_i
self
.
_att_norms
[
att_name
]
=
att_norms_i
# Build the final prediction layer.
att_predictor_kwargs
=
{
'filters'
:
(
att_size
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
att_type
==
'regression'
:
att_predictor_kwargs
.
update
(
{
'bias_initializer'
:
tf
.
zeros_initializer
()})
elif
att_type
==
'classification'
:
att_predictor_kwargs
.
update
({
'bias_initializer'
:
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
))
})
else
:
raise
ValueError
(
'Attribute head type {} not supported.'
.
format
(
att_type
))
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
att_predictor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_att_predictors
[
att_name
]
=
conv_op
(
name
=
'{}_attributes'
.
format
(
att_name
),
**
att_predictor_kwargs
)
super
(
RetinaNetHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
features
:
Mapping
[
str
,
tf
.
Tensor
]):
"""Forward pass of the RetinaNet head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l,
num_params_per_anchor * num_anchors_per_location].
attributes: a dict of (attribute_name, attribute_prediction). Each
`attribute_prediction` is a dict of:
- key: `str`, the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature
level, whose shape is
[batch, height_l, width_l,
attribute_size * num_anchors_per_location].
Can be an empty dictionary if no attribute learning is required.
"""
scores
=
{}
boxes
=
{}
if
self
.
_config_dict
[
'attribute_heads'
]:
attributes
=
{
att_config
[
'name'
]:
{}
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]
}
else
:
attributes
=
{}
for
i
,
level
in
enumerate
(
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
)):
this_level_features
=
features
[
str
(
level
)]
# class net.
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_cls_convs
,
self
.
_cls_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
scores
[
str
(
level
)]
=
self
.
_classifier
(
x
)
# box net.
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_box_convs
,
self
.
_box_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
boxes
[
str
(
level
)]
=
self
.
_box_regressor
(
x
)
# attribute nets.
if
self
.
_config_dict
[
'attribute_heads'
]:
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]:
att_name
=
att_config
[
'name'
]
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_att_convs
[
att_name
],
self
.
_att_norms
[
att_name
][
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
attributes
[
att_name
][
str
(
level
)]
=
self
.
_att_predictors
[
att_name
](
x
)
return
scores
,
boxes
,
attributes
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RPNHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a Region Proposal Network (RPN) head."""
def
__init__
(
self
,
min_level
:
int
,
max_level
:
int
,
num_anchors_per_location
:
int
,
num_convs
:
int
=
1
,
num_filters
:
int
=
256
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a Region Proposal Network head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
RPNHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_anchors_per_location'
:
num_anchors_per_location
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
conv_name
=
'rpn-conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
norm_name
=
'rpn-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
self
.
_norms
.
append
(
this_level_norms
)
classifier_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_anchors_per_location'
],
'kernel_size'
:
1
,
'padding'
:
'valid'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
classifier_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_classifier
=
conv_op
(
name
=
'rpn-scores'
,
**
classifier_kwargs
)
box_regressor_kwargs
=
{
'filters'
:
4
*
self
.
_config_dict
[
'num_anchors_per_location'
],
'kernel_size'
:
1
,
'padding'
:
'valid'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
box_regressor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_box_regressor
=
conv_op
(
name
=
'rpn-boxes'
,
**
box_regressor_kwargs
)
super
(
RPNHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
features
:
Mapping
[
str
,
tf
.
Tensor
]):
"""Forward pass of the RPN head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
scores
=
{}
boxes
=
{}
for
i
,
level
in
enumerate
(
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
)):
x
=
features
[
str
(
level
)]
for
conv
,
norm
in
zip
(
self
.
_convs
,
self
.
_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
scores
[
str
(
level
)]
=
self
.
_classifier
(
x
)
boxes
[
str
(
level
)]
=
self
.
_box_regressor
(
x
)
return
scores
,
boxes
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/heads/dense_prediction_heads_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for dense_prediction_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
dense_prediction_heads
class
RetinaNetHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
False
,
False
,
False
),
(
False
,
True
,
False
),
(
True
,
False
,
True
),
(
True
,
True
,
True
),
)
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
,
has_att_heads
):
if
has_att_heads
:
attribute_heads
=
[
dict
(
name
=
'depth'
,
type
=
'regression'
,
size
=
1
)]
else
:
attribute_heads
=
None
retinanet_head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
3
,
max_level
=
4
,
num_classes
=
3
,
num_anchors_per_location
=
3
,
num_convs
=
2
,
num_filters
=
256
,
attribute_heads
=
attribute_heads
,
use_separable_conv
=
use_separable_conv
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
}
scores
,
boxes
,
attributes
=
retinanet_head
(
features
)
self
.
assertAllEqual
(
scores
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
9
])
self
.
assertAllEqual
(
scores
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
9
])
self
.
assertAllEqual
(
boxes
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
12
])
self
.
assertAllEqual
(
boxes
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
12
])
if
has_att_heads
:
for
att
in
attributes
.
values
():
self
.
assertAllEqual
(
att
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
3
])
self
.
assertAllEqual
(
att
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
3
])
def
test_serialize_deserialize
(
self
):
retinanet_head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
3
,
max_level
=
7
,
num_classes
=
3
,
num_anchors_per_location
=
9
,
num_convs
=
2
,
num_filters
=
16
,
attribute_heads
=
None
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
retinanet_head
.
get_config
()
new_retinanet_head
=
(
dense_prediction_heads
.
RetinaNetHead
.
from_config
(
config
))
self
.
assertAllEqual
(
retinanet_head
.
get_config
(),
new_retinanet_head
.
get_config
())
class
RpnHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
False
,
False
),
(
False
,
True
),
(
True
,
False
),
(
True
,
True
),
)
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
):
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
3
,
max_level
=
4
,
num_anchors_per_location
=
3
,
num_convs
=
2
,
num_filters
=
256
,
use_separable_conv
=
use_separable_conv
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
}
scores
,
boxes
=
rpn_head
(
features
)
self
.
assertAllEqual
(
scores
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
3
])
self
.
assertAllEqual
(
scores
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
3
])
self
.
assertAllEqual
(
boxes
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
12
])
self
.
assertAllEqual
(
boxes
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
12
])
def
test_serialize_deserialize
(
self
):
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
3
,
max_level
=
7
,
num_anchors_per_location
=
9
,
num_convs
=
2
,
num_filters
=
16
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
rpn_head
.
get_config
()
new_rpn_head
=
dense_prediction_heads
.
RPNHead
.
from_config
(
config
)
self
.
assertAllEqual
(
rpn_head
.
get_config
(),
new_rpn_head
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/heads/instance_heads.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of instance prediction heads."""
from
typing
import
List
,
Union
,
Optional
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DetectionHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a detection head."""
def
__init__
(
self
,
num_classes
:
int
,
num_convs
:
int
=
0
,
num_filters
:
int
=
256
,
use_separable_conv
:
bool
=
False
,
num_fcs
:
int
=
2
,
fc_dims
:
int
=
1024
,
class_agnostic_bbox_pred
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a detection head.
Args:
num_classes: An `int` for the number of classes.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the FC layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
num_fcs: An `int` number that represents the number of FC layers before
the predictions.
fc_dims: An `int` number that represents the number of dimension of the FC
layers.
class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be
predicted for every class or not.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
DetectionHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_separable_conv'
:
use_separable_conv
,
'num_fcs'
:
num_fcs
,
'fc_dims'
:
fc_dims
,
'class_agnostic_bbox_pred'
:
class_agnostic_bbox_pred
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
}
if
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
else
:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_conv_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
conv_name
=
'detection-conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
bn_name
=
'detection-conv-bn_{}'
.
format
(
i
)
self
.
_conv_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_fcs
=
[]
self
.
_fc_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_fcs'
]):
fc_name
=
'detection-fc_{}'
.
format
(
i
)
self
.
_fcs
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'fc_dims'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
1
/
3.0
,
mode
=
'fan_out'
,
distribution
=
'uniform'
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
fc_name
))
bn_name
=
'detection-fc-bn_{}'
.
format
(
i
)
self
.
_fc_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_classifier
=
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'num_classes'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
'detection-scores'
)
num_box_outputs
=
(
4
if
self
.
_config_dict
[
'class_agnostic_bbox_pred'
]
else
self
.
_config_dict
[
'num_classes'
]
*
4
)
self
.
_box_regressor
=
tf
.
keras
.
layers
.
Dense
(
units
=
num_box_outputs
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.001
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
'detection-boxes'
)
super
(
DetectionHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
bool
=
None
):
"""Forward pass of box and class branches for the Mask-RCNN model.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height,
roi_width, roi_channels], representing the ROI features.
training: a `bool` indicating whether it is in `training` mode.
Returns:
class_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
roi_features
=
inputs
_
,
num_rois
,
height
,
width
,
filters
=
roi_features
.
get_shape
().
as_list
()
x
=
tf
.
reshape
(
roi_features
,
[
-
1
,
height
,
width
,
filters
])
for
conv
,
bn
in
zip
(
self
.
_convs
,
self
.
_conv_norms
):
x
=
conv
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
_
,
_
,
_
,
filters
=
x
.
get_shape
().
as_list
()
x
=
tf
.
reshape
(
x
,
[
-
1
,
num_rois
,
height
*
width
*
filters
])
for
fc
,
bn
in
zip
(
self
.
_fcs
,
self
.
_fc_norms
):
x
=
fc
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
classes
=
self
.
_classifier
(
x
)
boxes
=
self
.
_box_regressor
(
x
)
return
classes
,
boxes
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MaskHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a mask head."""
def
__init__
(
self
,
num_classes
:
int
,
upsample_factor
:
int
=
2
,
num_convs
:
int
=
4
,
num_filters
:
int
=
256
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
class_agnostic
:
bool
=
False
,
**
kwargs
):
"""Initializes a mask head.
Args:
num_classes: An `int` of the number of classes.
upsample_factor: An `int` that indicates the upsample factor to generate
the final predicted masks. It should be >= 1.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the mask prediction layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
class_agnostic: A `bool`. If set, we use a single channel mask head that
is shared between all classes.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
MaskHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'upsample_factor'
:
upsample_factor
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'class_agnostic'
:
class_agnostic
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
}
if
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
else
:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_conv_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
conv_name
=
'mask-conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
bn_name
=
'mask-conv-bn_{}'
.
format
(
i
)
self
.
_conv_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_deconv
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
filters
=
self
.
_config_dict
[
'num_filters'
],
kernel_size
=
self
.
_config_dict
[
'upsample_factor'
],
strides
=
self
.
_config_dict
[
'upsample_factor'
],
padding
=
'valid'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
'mask-upsampling'
)
self
.
_deconv_bn
=
bn_op
(
name
=
'mask-deconv-bn'
,
**
bn_kwargs
)
if
self
.
_config_dict
[
'class_agnostic'
]:
num_filters
=
1
else
:
num_filters
=
self
.
_config_dict
[
'num_classes'
]
conv_kwargs
=
{
'filters'
:
num_filters
,
'kernel_size'
:
1
,
'padding'
:
'valid'
,
}
if
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
else
:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
self
.
_mask_regressor
=
conv_op
(
name
=
'mask-logits'
,
**
conv_kwargs
)
super
(
MaskHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
:
List
[
tf
.
Tensor
],
training
:
bool
=
None
):
"""Forward pass of mask branch for the Mask-RCNN model.
Args:
inputs: A `list` of two tensors where
inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances,
roi_height, roi_width, roi_channels], representing the ROI features.
inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances],
representing the classes of the ROIs.
training: A `bool` indicating whether it is in `training` mode.
Returns:
mask_outputs: A `tf.Tensor` of shape
[batch_size, num_instances, roi_height * upsample_factor,
roi_width * upsample_factor], representing the mask predictions.
"""
roi_features
,
roi_classes
=
inputs
batch_size
,
num_rois
,
height
,
width
,
filters
=
(
roi_features
.
get_shape
().
as_list
())
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
roi_features
)[
0
]
x
=
tf
.
reshape
(
roi_features
,
[
-
1
,
height
,
width
,
filters
])
for
conv
,
bn
in
zip
(
self
.
_convs
,
self
.
_conv_norms
):
x
=
conv
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
x
=
self
.
_deconv
(
x
)
x
=
self
.
_deconv_bn
(
x
)
x
=
self
.
_activation
(
x
)
logits
=
self
.
_mask_regressor
(
x
)
mask_height
=
height
*
self
.
_config_dict
[
'upsample_factor'
]
mask_width
=
width
*
self
.
_config_dict
[
'upsample_factor'
]
if
self
.
_config_dict
[
'class_agnostic'
]:
logits
=
tf
.
reshape
(
logits
,
[
-
1
,
num_rois
,
mask_height
,
mask_width
,
1
])
else
:
logits
=
tf
.
reshape
(
logits
,
[
-
1
,
num_rois
,
mask_height
,
mask_width
,
self
.
_config_dict
[
'num_classes'
]])
batch_indices
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
batch_size
),
axis
=
1
),
[
1
,
num_rois
])
mask_indices
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
num_rois
),
axis
=
0
),
[
batch_size
,
1
])
if
self
.
_config_dict
[
'class_agnostic'
]:
class_gather_indices
=
tf
.
zeros_like
(
roi_classes
,
dtype
=
tf
.
int32
)
else
:
class_gather_indices
=
tf
.
cast
(
roi_classes
,
dtype
=
tf
.
int32
)
gather_indices
=
tf
.
stack
(
[
batch_indices
,
mask_indices
,
class_gather_indices
],
axis
=
2
)
mask_outputs
=
tf
.
gather_nd
(
tf
.
transpose
(
logits
,
[
0
,
1
,
4
,
2
,
3
]),
gather_indices
)
return
mask_outputs
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/heads/instance_heads_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for instance_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
instance_heads
class
DetectionHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
0
,
0
,
False
,
False
),
(
0
,
1
,
False
,
False
),
(
1
,
0
,
False
,
False
),
(
1
,
1
,
False
,
False
),
)
def
test_forward
(
self
,
num_convs
,
num_fcs
,
use_separable_conv
,
use_sync_bn
):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
3
,
num_convs
=
num_convs
,
num_filters
=
16
,
use_separable_conv
=
use_separable_conv
,
num_fcs
=
num_fcs
,
fc_dims
=
4
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
128
,
128
,
16
)
scores
,
boxes
=
detection_head
(
roi_features
)
self
.
assertAllEqual
(
scores
.
numpy
().
shape
,
[
2
,
10
,
3
])
self
.
assertAllEqual
(
boxes
.
numpy
().
shape
,
[
2
,
10
,
12
])
def
test_serialize_deserialize
(
self
):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
91
,
num_convs
=
0
,
num_filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
detection_head
.
get_config
()
new_detection_head
=
instance_heads
.
DetectionHead
.
from_config
(
config
)
self
.
assertAllEqual
(
detection_head
.
get_config
(),
new_detection_head
.
get_config
())
class
MaskHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
1
,
1
,
False
),
(
1
,
2
,
False
),
(
2
,
1
,
False
),
(
2
,
2
,
False
),
)
def
test_forward
(
self
,
upsample_factor
,
num_convs
,
use_sync_bn
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
upsample_factor
=
upsample_factor
,
num_convs
=
num_convs
,
num_filters
=
16
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
14
,
14
,
16
)
roi_classes
=
np
.
zeros
((
2
,
10
))
masks
=
mask_head
([
roi_features
,
roi_classes
])
self
.
assertAllEqual
(
masks
.
numpy
().
shape
,
[
2
,
10
,
14
*
upsample_factor
,
14
*
upsample_factor
])
def
test_serialize_deserialize
(
self
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
upsample_factor
=
2
,
num_convs
=
1
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
mask_head
.
get_config
()
new_mask_head
=
instance_heads
.
MaskHead
.
from_config
(
config
)
self
.
assertAllEqual
(
mask_head
.
get_config
(),
new_mask_head
.
get_config
())
def
test_forward_class_agnostic
(
self
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
class_agnostic
=
True
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
14
,
14
,
16
)
roi_classes
=
np
.
zeros
((
2
,
10
))
masks
=
mask_head
([
roi_features
,
roi_classes
])
self
.
assertAllEqual
(
masks
.
numpy
().
shape
,
[
2
,
10
,
28
,
28
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/heads/segmentation_heads.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of segmentation heads."""
from
typing
import
List
,
Union
,
Optional
,
Mapping
,
Tuple
,
Any
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.modeling.layers
import
nn_layers
from
official.vision.ops
import
spatial_transform_ops
class
MaskScoring
(
tf
.
keras
.
Model
):
"""Creates a mask scoring layer.
This implements mask scoring layer from the paper:
Zhaojin Huang, Lichao Huang, Yongchao Gong, Chang Huang, Xinggang Wang.
Mask Scoring R-CNN.
(https://arxiv.org/pdf/1903.00241.pdf)
"""
def
__init__
(
self
,
num_classes
:
int
,
fc_input_size
:
List
[
int
],
num_convs
:
int
=
3
,
num_filters
:
int
=
256
,
fc_dims
:
int
=
1024
,
num_fcs
:
int
=
2
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes mask scoring layer.
Args:
num_classes: An `int` for number of classes.
fc_input_size: A List of `int` for the input size of the
fully connected layers.
num_convs: An`int` for number of conv layers.
num_filters: An `int` for the number of filters for conv layers.
fc_dims: An `int` number of filters for each fully connected layers.
num_fcs: An `int` for number of fully connected layers.
activation: A `str` name of the activation function.
use_sync_bn: A bool, whether or not to use sync batch normalization.
norm_momentum: A float for the momentum in BatchNorm. Defaults to 0.99.
norm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
MaskScoring
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'fc_input_size'
:
fc_input_size
,
'fc_dims'
:
fc_dims
,
'num_fcs'
:
num_fcs
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'activation'
:
activation
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the mask scoring head."""
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
}
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_conv_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
conv_name
=
'mask-scoring_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
bn_name
=
'mask-scoring-bn_{}'
.
format
(
i
)
self
.
_conv_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_fcs
=
[]
self
.
_fc_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_fcs'
]):
fc_name
=
'mask-scoring-fc_{}'
.
format
(
i
)
self
.
_fcs
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'fc_dims'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
1
/
3.0
,
mode
=
'fan_out'
,
distribution
=
'uniform'
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
fc_name
))
bn_name
=
'mask-scoring-fc-bn_{}'
.
format
(
i
)
self
.
_fc_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_classifier
=
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'num_classes'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
'iou-scores'
)
super
(
MaskScoring
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
bool
=
None
):
"""Forward pass mask scoring head.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, width, size, num_classes],
representing the segmentation logits.
training: a `bool` indicating whether it is in `training` mode.
Returns:
mask_scores: A `tf.Tensor` of predicted mask scores
[batch_size, num_classes].
"""
x
=
tf
.
stop_gradient
(
inputs
)
for
conv
,
bn
in
zip
(
self
.
_convs
,
self
.
_conv_norms
):
x
=
conv
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
# Casts feat to float32 so the resize op can be run on TPU.
x
=
tf
.
cast
(
x
,
tf
.
float32
)
x
=
tf
.
image
.
resize
(
x
,
size
=
self
.
_config_dict
[
'fc_input_size'
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
# Casts it back to be compatible with the rest opetations.
x
=
tf
.
cast
(
x
,
inputs
.
dtype
)
_
,
h
,
w
,
filters
=
x
.
get_shape
().
as_list
()
x
=
tf
.
reshape
(
x
,
[
-
1
,
h
*
w
*
filters
])
for
fc
,
bn
in
zip
(
self
.
_fcs
,
self
.
_fc_norms
):
x
=
fc
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
ious
=
self
.
_classifier
(
x
)
return
ious
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SegmentationHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a segmentation head."""
def
__init__
(
self
,
num_classes
:
int
,
level
:
Union
[
int
,
str
],
num_convs
:
int
=
2
,
num_filters
:
int
=
256
,
use_depthwise_convolution
:
bool
=
False
,
prediction_kernel_size
:
int
=
1
,
upsample_factor
:
int
=
1
,
feature_fusion
:
Optional
[
str
]
=
None
,
decoder_min_level
:
Optional
[
int
]
=
None
,
decoder_max_level
:
Optional
[
int
]
=
None
,
low_level
:
int
=
2
,
low_level_num_filters
:
int
=
48
,
num_decoder_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a segmentation head.
Args:
num_classes: An `int` number of mask classification categories. The number
of classes does not include background class.
level: An `int` or `str`, level to use to build segmentation head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level.
decoder_min_level: An `int` of minimum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
decoder_max_level: An `int` of maximum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
SegmentationHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'level'
:
level
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'prediction_kernel_size'
:
prediction_kernel_size
,
'upsample_factor'
:
upsample_factor
,
'feature_fusion'
:
feature_fusion
,
'decoder_min_level'
:
decoder_min_level
,
'decoder_max_level'
:
decoder_max_level
,
'low_level'
:
low_level
,
'low_level_num_filters'
:
low_level_num_filters
,
'num_decoder_filters'
:
num_decoder_filters
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the segmentation head."""
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
]
random_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
)
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'kernel_size'
:
3
if
not
use_depthwise_convolution
else
1
,
'padding'
:
'same'
,
'use_bias'
:
False
,
'kernel_initializer'
:
random_initializer
,
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
}
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
if
self
.
_config_dict
[
'feature_fusion'
]
==
'deeplabv3plus'
:
# Deeplabv3+ feature fusion layers.
self
.
_dlv3p_conv
=
conv_op
(
kernel_size
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
name
=
'segmentation_head_deeplabv3p_fusion_conv'
,
filters
=
self
.
_config_dict
[
'low_level_num_filters'
])
self
.
_dlv3p_norm
=
bn_op
(
name
=
'segmentation_head_deeplabv3p_fusion_norm'
,
**
bn_kwargs
)
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'panoptic_fpn_fusion'
:
self
.
_panoptic_fpn_fusion
=
nn_layers
.
PanopticFPNFusion
(
min_level
=
self
.
_config_dict
[
'decoder_min_level'
],
max_level
=
self
.
_config_dict
[
'decoder_max_level'
],
target_level
=
self
.
_config_dict
[
'level'
],
num_filters
=
self
.
_config_dict
[
'num_filters'
],
num_fpn_filters
=
self
.
_config_dict
[
'num_decoder_filters'
],
activation
=
self
.
_config_dict
[
'activation'
],
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
# Segmentation head layers.
self
.
_convs
=
[]
self
.
_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
use_depthwise_convolution
:
self
.
_convs
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
name
=
'segmentation_head_depthwise_conv_{}'
.
format
(
i
),
kernel_size
=
3
,
padding
=
'same'
,
use_bias
=
False
,
depthwise_initializer
=
random_initializer
,
depthwise_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
depth_multiplier
=
1
))
norm_name
=
'segmentation_head_depthwise_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
conv_name
=
'segmentation_head_conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
filters
=
self
.
_config_dict
[
'num_filters'
],
**
conv_kwargs
))
norm_name
=
'segmentation_head_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
self
.
_classifier
=
conv_op
(
name
=
'segmentation_output'
,
filters
=
self
.
_config_dict
[
'num_classes'
],
kernel_size
=
self
.
_config_dict
[
'prediction_kernel_size'
],
padding
=
'same'
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
super
().
build
(
input_shape
)
def
call
(
self
,
inputs
:
Tuple
[
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]],
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]]):
"""Forward pass of the segmentation head.
It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone endpoints, and the second is decoder endpoints. When inputs are
tensors, they are from a single level of feature maps. When inputs are
dictionaries, they contain multiple levels of feature maps, where the key
is the index of feature map.
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
The first is backbone endpoints, and the second is decoder endpoints.
Returns:
segmentation prediction mask: A `tf.Tensor` of the segmentation mask
scores predicted from input features.
"""
backbone_output
=
inputs
[
0
]
decoder_output
=
inputs
[
1
]
if
self
.
_config_dict
[
'feature_fusion'
]
==
'deeplabv3plus'
:
# deeplabv3+ feature fusion
x
=
decoder_output
[
str
(
self
.
_config_dict
[
'level'
])]
if
isinstance
(
decoder_output
,
dict
)
else
decoder_output
y
=
backbone_output
[
str
(
self
.
_config_dict
[
'low_level'
])]
if
isinstance
(
backbone_output
,
dict
)
else
backbone_output
y
=
self
.
_dlv3p_norm
(
self
.
_dlv3p_conv
(
y
))
y
=
self
.
_activation
(
y
)
x
=
tf
.
image
.
resize
(
x
,
tf
.
shape
(
y
)[
1
:
3
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
x
=
tf
.
cast
(
x
,
dtype
=
y
.
dtype
)
x
=
tf
.
concat
([
x
,
y
],
axis
=
self
.
_bn_axis
)
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'pyramid_fusion'
:
if
not
isinstance
(
decoder_output
,
dict
):
raise
ValueError
(
'Only support dictionary decoder_output.'
)
x
=
nn_layers
.
pyramid_feature_fusion
(
decoder_output
,
self
.
_config_dict
[
'level'
])
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'panoptic_fpn_fusion'
:
x
=
self
.
_panoptic_fpn_fusion
(
decoder_output
)
else
:
x
=
decoder_output
[
str
(
self
.
_config_dict
[
'level'
])]
if
isinstance
(
decoder_output
,
dict
)
else
decoder_output
for
conv
,
norm
in
zip
(
self
.
_convs
,
self
.
_norms
):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
if
self
.
_config_dict
[
'upsample_factor'
]
>
1
:
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
self
.
_config_dict
[
'upsample_factor'
])
return
self
.
_classifier
(
x
)
def
get_config
(
self
):
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
self
.
_config_dict
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/heads/segmentation_heads_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for segmentation_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
segmentation_heads
class
SegmentationHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
2
,
'pyramid_fusion'
,
None
,
None
),
(
3
,
'pyramid_fusion'
,
None
,
None
),
(
2
,
'panoptic_fpn_fusion'
,
2
,
5
),
(
2
,
'panoptic_fpn_fusion'
,
2
,
6
),
(
3
,
'panoptic_fpn_fusion'
,
3
,
5
),
(
3
,
'panoptic_fpn_fusion'
,
3
,
6
))
def
test_forward
(
self
,
level
,
feature_fusion
,
decoder_min_level
,
decoder_max_level
):
backbone_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
16
),
}
decoder_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
64
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
64
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
64
),
'6'
:
np
.
random
.
rand
(
2
,
16
,
16
,
64
),
}
if
feature_fusion
==
'panoptic_fpn_fusion'
:
backbone_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
16
)
decoder_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
64
)
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
10
,
level
=
level
,
feature_fusion
=
feature_fusion
,
decoder_min_level
=
decoder_min_level
,
decoder_max_level
=
decoder_max_level
,
num_decoder_filters
=
64
)
logits
=
head
((
backbone_features
,
decoder_features
))
if
level
in
decoder_features
:
self
.
assertAllEqual
(
logits
.
numpy
().
shape
,
[
2
,
decoder_features
[
str
(
level
)].
shape
[
1
],
decoder_features
[
str
(
level
)].
shape
[
2
],
10
])
def
test_serialize_deserialize
(
self
):
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
10
,
level
=
3
)
config
=
head
.
get_config
()
new_head
=
segmentation_heads
.
SegmentationHead
.
from_config
(
config
)
self
.
assertAllEqual
(
head
.
get_config
(),
new_head
.
get_config
())
class
MaskScoringHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
1
,
1
,
64
,
[
4
,
4
]),
(
2
,
1
,
64
,
[
4
,
4
]),
(
3
,
1
,
64
,
[
4
,
4
]),
(
1
,
2
,
32
,
[
8
,
8
]),
(
2
,
2
,
32
,
[
8
,
8
]),
(
3
,
2
,
32
,
[
8
,
8
]),)
def
test_forward
(
self
,
num_convs
,
num_fcs
,
num_filters
,
fc_input_size
):
features
=
np
.
random
.
rand
(
2
,
64
,
64
,
16
)
head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
2
,
num_convs
=
num_convs
,
num_filters
=
num_filters
,
fc_dims
=
128
,
fc_input_size
=
fc_input_size
)
scores
=
head
(
features
)
self
.
assertAllEqual
(
scores
.
numpy
().
shape
,
[
2
,
2
])
def
test_serialize_deserialize
(
self
):
head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
2
,
fc_input_size
=
[
4
,
4
],
fc_dims
=
128
)
config
=
head
.
get_config
()
new_head
=
segmentation_heads
.
MaskScoring
.
from_config
(
config
)
self
.
assertAllEqual
(
head
.
get_config
(),
new_head
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/__init__.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Layers package definition."""
from
official.vision.modeling.layers.box_sampler
import
BoxSampler
from
official.vision.modeling.layers.detection_generator
import
DetectionGenerator
from
official.vision.modeling.layers.detection_generator
import
MultilevelDetectionGenerator
from
official.vision.modeling.layers.mask_sampler
import
MaskSampler
from
official.vision.modeling.layers.nn_blocks
import
BottleneckBlock
from
official.vision.modeling.layers.nn_blocks
import
BottleneckResidualInner
from
official.vision.modeling.layers.nn_blocks
import
DepthwiseSeparableConvBlock
from
official.vision.modeling.layers.nn_blocks
import
InvertedBottleneckBlock
from
official.vision.modeling.layers.nn_blocks
import
ResidualBlock
from
official.vision.modeling.layers.nn_blocks
import
ResidualInner
from
official.vision.modeling.layers.nn_blocks
import
ReversibleLayer
from
official.vision.modeling.layers.nn_blocks_3d
import
BottleneckBlock3D
from
official.vision.modeling.layers.nn_blocks_3d
import
SelfGating
from
official.vision.modeling.layers.nn_layers
import
CausalConvMixin
from
official.vision.modeling.layers.nn_layers
import
Conv2D
from
official.vision.modeling.layers.nn_layers
import
Conv3D
from
official.vision.modeling.layers.nn_layers
import
DepthwiseConv2D
from
official.vision.modeling.layers.nn_layers
import
GlobalAveragePool3D
from
official.vision.modeling.layers.nn_layers
import
PositionalEncoding
from
official.vision.modeling.layers.nn_layers
import
Scale
from
official.vision.modeling.layers.nn_layers
import
SpatialAveragePool3D
from
official.vision.modeling.layers.nn_layers
import
SqueezeExcitation
from
official.vision.modeling.layers.nn_layers
import
StochasticDepth
from
official.vision.modeling.layers.nn_layers
import
TemporalSoftmaxPool
from
official.vision.modeling.layers.roi_aligner
import
MultilevelROIAligner
from
official.vision.modeling.layers.roi_generator
import
MultilevelROIGenerator
from
official.vision.modeling.layers.roi_sampler
import
ROISampler
official/vision/modeling/layers/box_sampler.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of box sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.ops
import
sampling_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BoxSampler
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a BoxSampler to sample positive and negative boxes."""
def
__init__
(
self
,
num_samples
:
int
=
512
,
foreground_fraction
:
float
=
0.25
,
**
kwargs
):
"""Initializes a box sampler.
Args:
num_samples: An `int` of the number of sampled boxes per image.
foreground_fraction: A `float` in [0, 1], what percentage of boxes should
be sampled from the positive examples.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'num_samples'
:
num_samples
,
'foreground_fraction'
:
foreground_fraction
,
}
super
(
BoxSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
positive_matches
:
tf
.
Tensor
,
negative_matches
:
tf
.
Tensor
,
ignored_matches
:
tf
.
Tensor
):
"""Samples and selects positive and negative instances.
Args:
positive_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance should
be ignored.
Returns:
A `tf.tensor` of shape of [batch_size, K], storing the indices of the
sampled examples, where K is `num_samples`.
"""
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_matches
,
negative_matches
),
tf
.
logical_not
(
ignored_matches
))
sampler
=
sampling_ops
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
self
.
_config_dict
[
'foreground_fraction'
],
is_static
=
True
)
batch_size
=
sample_candidates
.
shape
[
0
]
sampled_indicators
=
[]
for
i
in
range
(
batch_size
):
sampled_indicator
=
sampler
.
subsample
(
sample_candidates
[
i
],
self
.
_config_dict
[
'num_samples'
],
positive_matches
[
i
])
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
_
,
selected_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
sampled_indicators
,
dtype
=
tf
.
int32
),
k
=
self
.
_config_dict
[
'num_samples'
],
sorted
=
True
)
return
selected_indices
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/layers/deeplab.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layers for DeepLabV3."""
import
tensorflow
as
tf
class
SpatialPyramidPooling
(
tf
.
keras
.
layers
.
Layer
):
"""Implements the Atrous Spatial Pyramid Pooling.
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
"""
def
__init__
(
self
,
output_channels
,
dilation_rates
,
pool_kernel_size
=
None
,
use_sync_bn
=
False
,
batchnorm_momentum
=
0.99
,
batchnorm_epsilon
=
0.001
,
activation
=
'relu'
,
dropout
=
0.5
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
interpolation
=
'bilinear'
,
use_depthwise_convolution
=
False
,
**
kwargs
):
"""Initializes `SpatialPyramidPooling`.
Args:
output_channels: Number of channels produced by SpatialPyramidPooling.
dilation_rates: A list of integers for parallel dilated conv.
pool_kernel_size: A list of integers or None. If None, global average
pooling is applied, otherwise an average pooling of pool_kernel_size
is applied.
use_sync_bn: A bool, whether or not to use sync batch normalization.
batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
0.99.
batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
activation: A `str` for type of activation to be used. Defaults to 'relu'.
dropout: A float for the dropout rate before output. Defaults to 0.5.
kernel_initializer: Kernel initializer for conv layers. Defaults to
`glorot_uniform`.
kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
interpolation: The interpolation method for upsampling. Defaults to
`bilinear`.
use_depthwise_convolution: Allows spatial pooling to be separable
depthwise convolusions. [Encoder-Decoder with Atrous Separable
Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1802.02611.pdf)
**kwargs: Other keyword arguments for the layer.
"""
super
(
SpatialPyramidPooling
,
self
).
__init__
(
**
kwargs
)
self
.
output_channels
=
output_channels
self
.
dilation_rates
=
dilation_rates
self
.
use_sync_bn
=
use_sync_bn
self
.
batchnorm_momentum
=
batchnorm_momentum
self
.
batchnorm_epsilon
=
batchnorm_epsilon
self
.
activation
=
activation
self
.
dropout
=
dropout
self
.
kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
interpolation
=
interpolation
self
.
input_spec
=
tf
.
keras
.
layers
.
InputSpec
(
ndim
=
4
)
self
.
pool_kernel_size
=
pool_kernel_size
self
.
use_depthwise_convolution
=
use_depthwise_convolution
def
build
(
self
,
input_shape
):
height
=
input_shape
[
1
]
width
=
input_shape
[
2
]
channels
=
input_shape
[
3
]
self
.
aspp_layers
=
[]
if
self
.
use_sync_bn
:
bn_op
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
bn_op
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
conv_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
)
])
self
.
aspp_layers
.
append
(
conv_sequential
)
for
dilation_rate
in
self
.
dilation_rates
:
leading_layers
=
[]
kernel_size
=
(
3
,
3
)
if
self
.
use_depthwise_convolution
:
leading_layers
+=
[
tf
.
keras
.
layers
.
DepthwiseConv2D
(
depth_multiplier
=
1
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
depthwise_regularizer
=
self
.
kernel_regularizer
,
depthwise_initializer
=
self
.
kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
)
]
kernel_size
=
(
1
,
1
)
conv_sequential
=
tf
.
keras
.
Sequential
(
leading_layers
+
[
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
kernel_regularizer
=
self
.
kernel_regularizer
,
kernel_initializer
=
self
.
kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
)])
self
.
aspp_layers
.
append
(
conv_sequential
)
if
self
.
pool_kernel_size
is
None
:
pool_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
GlobalAveragePooling2D
(),
tf
.
keras
.
layers
.
Reshape
((
1
,
1
,
channels
))])
else
:
pool_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
AveragePooling2D
(
self
.
pool_kernel_size
)])
pool_sequential
.
add
(
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
),
tf
.
keras
.
layers
.
experimental
.
preprocessing
.
Resizing
(
height
,
width
,
interpolation
=
self
.
interpolation
,
dtype
=
tf
.
float32
)
]))
self
.
aspp_layers
.
append
(
pool_sequential
)
self
.
projection
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
),
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
dropout
)])
def
call
(
self
,
inputs
,
training
=
None
):
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
result
=
[]
for
layer
in
self
.
aspp_layers
:
result
.
append
(
tf
.
cast
(
layer
(
inputs
,
training
=
training
),
inputs
.
dtype
))
result
=
tf
.
concat
(
result
,
axis
=-
1
)
result
=
self
.
projection
(
result
,
training
=
training
)
return
result
def
get_config
(
self
):
config
=
{
'output_channels'
:
self
.
output_channels
,
'dilation_rates'
:
self
.
dilation_rates
,
'pool_kernel_size'
:
self
.
pool_kernel_size
,
'use_sync_bn'
:
self
.
use_sync_bn
,
'batchnorm_momentum'
:
self
.
batchnorm_momentum
,
'batchnorm_epsilon'
:
self
.
batchnorm_epsilon
,
'activation'
:
self
.
activation
,
'dropout'
:
self
.
dropout
,
'kernel_initializer'
:
tf
.
keras
.
initializers
.
serialize
(
self
.
kernel_initializer
),
'kernel_regularizer'
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
kernel_regularizer
),
'interpolation'
:
self
.
interpolation
,
}
base_config
=
super
(
SpatialPyramidPooling
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
official/vision/modeling/layers/deeplab_test.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for ASPP."""
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
from
official.vision.modeling.layers
import
deeplab
@
keras_parameterized
.
run_all_keras_modes
class
DeeplabTest
(
keras_parameterized
.
TestCase
):
@
keras_parameterized
.
parameterized
.
parameters
(
(
None
,),
([
32
,
32
],),
)
def
test_aspp
(
self
,
pool_kernel_size
):
inputs
=
tf
.
keras
.
Input
(
shape
=
(
64
,
64
,
128
),
dtype
=
tf
.
float32
)
layer
=
deeplab
.
SpatialPyramidPooling
(
output_channels
=
256
,
dilation_rates
=
[
6
,
12
,
18
],
pool_kernel_size
=
None
)
output
=
layer
(
inputs
)
self
.
assertAllEqual
([
None
,
64
,
64
,
256
],
output
.
shape
)
def
test_aspp_invalid_shape
(
self
):
inputs
=
tf
.
keras
.
Input
(
shape
=
(
64
,
64
),
dtype
=
tf
.
float32
)
layer
=
deeplab
.
SpatialPyramidPooling
(
output_channels
=
256
,
dilation_rates
=
[
6
,
12
,
18
])
with
self
.
assertRaises
(
ValueError
):
_
=
layer
(
inputs
)
def
test_config_with_custom_name
(
self
):
layer
=
deeplab
.
SpatialPyramidPooling
(
256
,
[
5
],
name
=
'aspp'
)
config
=
layer
.
get_config
()
layer_1
=
deeplab
.
SpatialPyramidPooling
.
from_config
(
config
)
self
.
assertEqual
(
layer_1
.
name
,
layer
.
name
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/detection_generator.py
0 → 100644
View file @
c8e6faf7
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of generators to generate the final detections."""
import
contextlib
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Mapping
,
Sequence
# Import libraries
import
tensorflow
as
tf
from
official.vision.ops
import
box_ops
from
official.vision.ops
import
nms
from
official.vision.ops
import
preprocess_ops
def
_generate_detections_v1
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
soft_nms_sigma
:
Optional
[
float
]
=
None
):
"""Generates the final detections given the model outputs.
The implementation unrolls the batch dimension and process images one by one.
It required the batch dimension to be statically known and it is TPU
compatible.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]` for box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
attributes: None or a dict of (attribute_name, attributes) pairs. Each
attributes is a `tf.Tensor` with shape
`[batch_size, N, num_classes, attribute_size]` or
`[batch_size, N, 1, attribute_size]` for attribute predictions on all
feature levels. The N is the number of total anchors on all levels. Can
be None if no attribute learning is required.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A scalar representing maximum number of boxes retained
over all classes.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0 (which is default), we fall back to standard NMS.
Returns:
nms_boxes: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections, 4]` representing top detected boxes in
`[y1, x1, y2, x2]`.
nms_scores: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing sorted confidence scores
for detected boxes. The values are between `[0, 1]`.
nms_classes: An `int` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing classes for detected
boxes.
valid_detections: An `int` type `tf.Tensor` of shape `[batch_size]` only the
top `valid_detections` boxes are valid detections.
nms_attributes: None or a dict of (attribute_name, attributes). Each
attribute is a `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections, attribute_size]` representing attribute
predictions for detected boxes. Can be an empty dict if no attribute
learning is required.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
=
scores
.
get_shape
().
as_list
()[
0
]
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
if
attributes
:
nmsed_attributes
=
{
att_name
:
[]
for
att_name
in
attributes
.
keys
()}
else
:
nmsed_attributes
=
{}
for
i
in
range
(
batch_size
):
(
nmsed_boxes_i
,
nmsed_scores_i
,
nmsed_classes_i
,
valid_detections_i
,
nmsed_att_i
)
=
_generate_detections_per_image
(
boxes
[
i
],
scores
[
i
],
attributes
=
{
att_name
:
att
[
i
]
for
att_name
,
att
in
attributes
.
items
()
}
if
attributes
else
{},
pre_nms_top_k
=
pre_nms_top_k
,
pre_nms_score_threshold
=
pre_nms_score_threshold
,
nms_iou_threshold
=
nms_iou_threshold
,
max_num_detections
=
max_num_detections
,
soft_nms_sigma
=
soft_nms_sigma
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
valid_detections
.
append
(
valid_detections_i
)
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
].
append
(
nmsed_att_i
[
att_name
])
nmsed_boxes
=
tf
.
stack
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
stack
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
stack
(
nmsed_classes
,
axis
=
0
)
valid_detections
=
tf
.
stack
(
valid_detections
,
axis
=
0
)
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
]
=
tf
.
stack
(
nmsed_attributes
[
att_name
],
axis
=
0
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
def
_generate_detections_per_image
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
soft_nms_sigma
:
Optional
[
float
]
=
None
):
"""Generates the final detections per image given the model outputs.
Args:
boxes: A `tf.Tensor` with shape `[N, num_classes, 4]` or `[N, 1, 4]`, which
box predictions on all feature levels. The N is the number of total
anchors on all levels.
scores: A `tf.Tensor` with shape `[N, num_classes]`, which stacks class
probability on all feature levels. The N is the number of total anchors on
all levels. The num_classes is the number of classes predicted by the
model. Note that the class_outputs here is the raw score.
attributes: If not None, a dict of `tf.Tensor`. Each value is in shape
`[N, num_classes, attribute_size]` or `[N, 1, attribute_size]` of
attribute predictions on all feature levels. The N is the number of total
anchors on all levels.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
If set to None, `tf.image.non_max_suppression_padded` is called instead.
Returns:
nms_boxes: A `float` tf.Tensor of shape `[max_num_detections, 4]`
representing top detected boxes in `[y1, x1, y2, x2]`.
nms_scores: A `float` tf.Tensor of shape `[max_num_detections]` representing
sorted confidence scores for detected boxes. The values are between [0,
1].
nms_classes: An `int` tf.Tensor of shape `[max_num_detections]` representing
classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [1] only the top
`valid_detections` boxes are valid detections.
nms_attributes: None or a dict. Each value is a `float` tf.Tensor of shape
`[max_num_detections, attribute_size]` representing attribute predictions
for detected boxes. Can be an empty dict if `attributes` is None.
"""
nmsed_boxes
=
[]
nmsed_scores
=
[]
nmsed_classes
=
[]
num_classes_for_box
=
boxes
.
get_shape
().
as_list
()[
1
]
num_classes
=
scores
.
get_shape
().
as_list
()[
1
]
if
attributes
:
nmsed_attributes
=
{
att_name
:
[]
for
att_name
in
attributes
.
keys
()}
else
:
nmsed_attributes
=
{}
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
min
(
num_classes_for_box
-
1
,
i
)]
scores_i
=
scores
[:,
i
]
# Obtains pre_nms_top_k before running NMS.
scores_i
,
indices
=
tf
.
nn
.
top_k
(
scores_i
,
k
=
tf
.
minimum
(
tf
.
shape
(
scores_i
)[
-
1
],
pre_nms_top_k
))
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
)
if
soft_nms_sigma
is
not
None
:
(
nmsed_indices_i
,
nmsed_scores_i
)
=
tf
.
image
.
non_max_suppression_with_scores
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
soft_nms_sigma
=
soft_nms_sigma
,
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_boxes_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_boxes_i
,
max_num_detections
,
0.0
)
nmsed_scores_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_scores_i
,
max_num_detections
,
-
1.0
)
else
:
(
nmsed_indices_i
,
nmsed_num_valid_i
)
=
tf
.
image
.
non_max_suppression_padded
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_to_max_output_size
=
True
,
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_scores_i
=
tf
.
gather
(
scores_i
,
nmsed_indices_i
)
# Sets scores of invalid boxes to -1.
nmsed_scores_i
=
tf
.
where
(
tf
.
less
(
tf
.
range
(
max_num_detections
),
[
nmsed_num_valid_i
]),
nmsed_scores_i
,
-
tf
.
ones_like
(
nmsed_scores_i
))
nmsed_classes_i
=
tf
.
fill
([
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
if
attributes
:
for
att_name
,
att
in
attributes
.
items
():
num_classes_for_attr
=
att
.
get_shape
().
as_list
()[
1
]
att_i
=
att
[:,
min
(
num_classes_for_attr
-
1
,
i
)]
att_i
=
tf
.
gather
(
att_i
,
indices
)
nmsed_att_i
=
tf
.
gather
(
att_i
,
nmsed_indices_i
)
nmsed_att_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_att_i
,
max_num_detections
,
0.0
)
nmsed_attributes
[
att_name
].
append
(
nmsed_att_i
)
# Concats results from all classes and sort them.
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
0
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
)
valid_detections
=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
-
1
),
tf
.
int32
))
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
]
=
tf
.
concat
(
nmsed_attributes
[
att_name
],
axis
=
0
)
nmsed_attributes
[
att_name
]
=
tf
.
gather
(
nmsed_attributes
[
att_name
],
indices
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
def
_select_top_k_scores
(
scores_in
:
tf
.
Tensor
,
pre_nms_num_detections
:
int
):
"""Selects top_k scores and indices for each class.
Args:
scores_in: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class logit outputs on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model.
pre_nms_num_detections: Number of candidates before NMS.
Returns:
scores and indices: A `tf.Tensor` with shape
`[batch_size, pre_nms_num_detections, num_classes]`.
"""
batch_size
,
num_anchors
,
num_class
=
scores_in
.
get_shape
().
as_list
()
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
scores_in
)[
0
]
scores_trans
=
tf
.
transpose
(
scores_in
,
perm
=
[
0
,
2
,
1
])
scores_trans
=
tf
.
reshape
(
scores_trans
,
[
-
1
,
num_anchors
])
top_k_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores_trans
,
k
=
pre_nms_num_detections
,
sorted
=
True
)
top_k_scores
=
tf
.
reshape
(
top_k_scores
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
top_k_indices
=
tf
.
reshape
(
top_k_indices
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
return
tf
.
transpose
(
top_k_scores
,
[
0
,
2
,
1
]),
tf
.
transpose
(
top_k_indices
,
[
0
,
2
,
1
])
def
_generate_detections_v2
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
):
"""Generates the final detections given the model outputs.
This implementation unrolls classes dimension while using the tf.while_loop
to implement the batched NMS, so that it can be parallelized at the batch
dimension. It should give better performance comparing to v1 implementation.
It is TPU compatible.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]`, which box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
batch_size
,
_
,
num_classes_for_box
,
_
=
boxes
.
get_shape
().
as_list
()
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
boxes
)[
0
]
_
,
total_anchors
,
num_classes
=
scores
.
get_shape
().
as_list
()
# Selects top pre_nms_num scores and indices before NMS.
scores
,
indices
=
_select_top_k_scores
(
scores
,
min
(
total_anchors
,
pre_nms_top_k
))
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
:,
min
(
num_classes_for_box
-
1
,
i
),
:]
scores_i
=
scores
[:,
:,
i
]
# Obtains pre_nms_top_k before running NMS.
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
[:,
:,
i
],
batch_dims
=
1
,
axis
=
1
)
# Filter out scores.
boxes_i
,
scores_i
=
box_ops
.
filter_boxes_by_scores
(
boxes_i
,
scores_i
,
min_score_threshold
=
pre_nms_score_threshold
)
(
nmsed_scores_i
,
nmsed_boxes_i
)
=
nms
.
sorted_non_max_suppression_padded
(
tf
.
cast
(
scores_i
,
tf
.
float32
),
tf
.
cast
(
boxes_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
)
nmsed_classes_i
=
tf
.
fill
([
batch_size
,
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
1
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
1
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
1
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
,
batch_dims
=
1
,
axis
=
1
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
,
batch_dims
=
1
)
valid_detections
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
0.0
),
tf
.
int32
),
axis
=
1
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_batched
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
pre_nms_score_threshold
:
float
,
nms_iou_threshold
:
float
,
max_num_detections
:
int
):
"""Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and
generates detected boxes. Note that this used batched nms, which is not
supported on TPU currently.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]`, which box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
tf
.
image
.
combined_non_max_suppression
(
boxes
,
scores
,
max_output_size_per_class
=
max_num_detections
,
max_total_size
=
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_per_class
=
False
,
clip_boxes
=
False
))
nmsed_classes
=
tf
.
cast
(
nmsed_classes
,
tf
.
int32
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_tflite_implements_signature
(
config
:
Dict
[
str
,
Any
])
->
str
:
"""Returns `experimental_implements` signature for TFLite's custom NMS op.
This signature encodes the arguments to correctly initialize TFLite's custom
post-processing op in the MLIR converter.
For details on `experimental_implements` see here:
https://www.tensorflow.org/api_docs/python/tf/function
Args:
config: A dictionary of configs defining parameters for TFLite NMS op.
Returns:
An `experimental_implements` signature string.
"""
scale_value
=
1.0
implements_signature
=
[
'name: "%s"'
%
'TFLite_Detection_PostProcess'
,
'attr { key: "max_detections" value { i: %d } }'
%
config
[
'max_detections'
],
'attr { key: "max_classes_per_detection" value { i: %d } }'
%
config
[
'max_classes_per_detection'
],
'attr { key: "use_regular_nms" value { b: %s } }'
%
str
(
config
[
'use_regular_nms'
]).
lower
(),
'attr { key: "nms_score_threshold" value { f: %f } }'
%
config
[
'nms_score_threshold'
],
'attr { key: "nms_iou_threshold" value { f: %f } }'
%
config
[
'nms_iou_threshold'
],
'attr { key: "y_scale" value { f: %f } }'
%
scale_value
,
'attr { key: "x_scale" value { f: %f } }'
%
scale_value
,
'attr { key: "h_scale" value { f: %f } }'
%
scale_value
,
'attr { key: "w_scale" value { f: %f } }'
%
scale_value
,
'attr { key: "num_classes" value { i: %d } }'
%
config
[
'num_classes'
]
]
implements_signature
=
' '
.
join
(
implements_signature
)
return
implements_signature
def
_generate_detections_tflite
(
raw_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
raw_scores
:
Mapping
[
str
,
tf
.
Tensor
],
anchor_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
config
:
Dict
[
str
,
Any
])
->
Sequence
[
Any
]:
"""Generate detections for conversion to TFLite.
Mathematically same as class-agnostic NMS, except that the last portion of
the TF graph constitutes a dummy `tf.function` that contains an annotation
for conversion to TFLite's custom NMS op. Using this custom op allows
features like post-training quantization & accelerator support.
NOTE: This function does NOT return a valid output, and is only meant to
generate a SavedModel for TFLite conversion via MLIR. The generated SavedModel
should not be used for inference.
For TFLite op details, see tensorflow/lite/kernels/detection_postprocess.cc
Args:
raw_boxes: A dictionary of tensors for raw boxes. Key is level of features
and value is a tensor denoting a level of boxes with shape [1, H, W, 4 *
num_anchors].
raw_scores: A dictionary of tensors for classes. Key is level of features
and value is a tensor denoting a level of logits with shape [1, H, W,
num_class * num_anchors].
anchor_boxes: A dictionary of tensors for anchor boxes. Key is level of
features and value is a tensor denoting a level of anchors with shape
[num_anchors, 4].
config: A dictionary of configs defining parameters for TFLite NMS op.
Returns:
A (dummy) tuple of (boxes, scores, classess, num_detections).
Raises:
ValueError: If the last dimension of predicted boxes is not divisible by 4,
or the last dimension of predicted scores is not divisible by number of
anchors per location.
"""
scores
,
boxes
,
anchors
=
[],
[],
[]
levels
=
list
(
raw_scores
.
keys
())
min_level
=
int
(
min
(
levels
))
max_level
=
int
(
max
(
levels
))
batch_size
=
tf
.
shape
(
raw_scores
[
str
(
min_level
)])[
0
]
num_anchors_per_locations_times_4
=
raw_boxes
[
str
(
min_level
)].
get_shape
().
as_list
()[
-
1
]
if
num_anchors_per_locations_times_4
%
4
!=
0
:
raise
ValueError
(
'The last dimension of predicted boxes should be divisible by 4.'
)
num_anchors_per_locations
=
num_anchors_per_locations_times_4
//
4
if
num_anchors_per_locations_times_4
%
4
!=
0
:
raise
ValueError
(
f
'The last dimension of predicted scores should be divisible by
{
num_anchors_per_locations
}
.'
)
num_classes
=
raw_scores
[
str
(
min_level
)].
get_shape
().
as_list
()[
-
1
]
//
num_anchors_per_locations
config
.
update
({
'num_classes'
:
num_classes
})
for
i
in
range
(
min_level
,
max_level
+
1
):
scores
.
append
(
tf
.
sigmoid
(
tf
.
reshape
(
raw_scores
[
str
(
i
)],
[
batch_size
,
-
1
,
num_classes
])))
boxes
.
append
(
tf
.
reshape
(
raw_boxes
[
str
(
i
)],
[
batch_size
,
-
1
,
4
]))
anchors
.
append
(
tf
.
reshape
(
anchor_boxes
[
str
(
i
)],
[
-
1
,
4
]))
scores
=
tf
.
concat
(
scores
,
1
)
boxes
=
tf
.
concat
(
boxes
,
1
)
anchors
=
tf
.
concat
(
anchors
,
0
)
ycenter_a
=
(
anchors
[...,
0
]
+
anchors
[...,
2
])
/
2
xcenter_a
=
(
anchors
[...,
1
]
+
anchors
[...,
3
])
/
2
ha
=
anchors
[...,
2
]
-
anchors
[...,
0
]
wa
=
anchors
[...,
3
]
-
anchors
[...,
1
]
anchors
=
tf
.
stack
([
ycenter_a
,
xcenter_a
,
ha
,
wa
],
axis
=-
1
)
# There is no TF equivalent for TFLite's custom post-processing op.
# So we add an 'empty' composite function here, that is legalized to the
# custom op with MLIR.
# For details, see: tensorflow/compiler/mlir/lite/utils/nms_utils.cc
@
tf
.
function
(
experimental_implements
=
_generate_detections_tflite_implements_signature
(
config
))
# pylint: disable=g-unused-argument,unused-argument
def
dummy_post_processing
(
input_boxes
,
input_scores
,
input_anchors
):
boxes
=
tf
.
constant
(
0.0
,
dtype
=
tf
.
float32
,
name
=
'boxes'
)
scores
=
tf
.
constant
(
0.0
,
dtype
=
tf
.
float32
,
name
=
'scores'
)
classes
=
tf
.
constant
(
0.0
,
dtype
=
tf
.
float32
,
name
=
'classes'
)
num_detections
=
tf
.
constant
(
0.0
,
dtype
=
tf
.
float32
,
name
=
'num_detections'
)
return
boxes
,
classes
,
scores
,
num_detections
return
dummy_post_processing
(
boxes
,
scores
,
anchors
)[::
-
1
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates the final detected boxes with scores and classes."""
def
__init__
(
self
,
apply_nms
:
bool
=
True
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
nms_version
:
str
=
'v2'
,
use_cpu_nms
:
bool
=
False
,
soft_nms_sigma
:
Optional
[
float
]
=
None
,
**
kwargs
):
"""Initializes a detection generator.
Args:
apply_nms: A `bool` of whether or not apply non maximum suppression.
If False, the decoded boxes and their scores are returned.
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
nms_version: A string of `batched`, `v1` or `v2` specifies NMS version.
use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
,
}
super
(
DetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
__call__
(
self
,
raw_boxes
:
tf
.
Tensor
,
raw_scores
:
tf
.
Tensor
,
anchor_boxes
:
tf
.
Tensor
,
image_shape
:
tf
.
Tensor
,
regression_weights
:
Optional
[
List
[
float
]]
=
None
,
bbox_per_class
:
bool
=
True
):
"""Generates final detections.
Args:
raw_boxes: A `tf.Tensor` of shape of `[batch_size, K, num_classes * 4]`
representing the class-specific box coordinates relative to anchors.
raw_scores: A `tf.Tensor` of shape of `[batch_size, K, num_classes]`
representing the class logits before applying score activiation.
anchor_boxes: A `tf.Tensor` of shape of `[batch_size, K, 4]` representing
the corresponding anchor boxes w.r.t `box_outputs`.
image_shape: A `tf.Tensor` of shape of `[batch_size, 2]` storing the image
height and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
regression_weights: A list of four float numbers to scale coordinates.
bbox_per_class: A `bool`. If True, perform per-class box regression.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` `tf.Tensor` of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: A `float` tf.Tensor of shape
[batch, num_raw_boxes] representing socres of all the decoded boxes.
"""
box_scores
=
tf
.
nn
.
softmax
(
raw_scores
,
axis
=-
1
)
# Removes the background class.
box_scores_shape
=
tf
.
shape
(
box_scores
)
box_scores_shape_list
=
box_scores
.
get_shape
().
as_list
()
batch_size
=
box_scores_shape
[
0
]
num_locations
=
box_scores_shape_list
[
1
]
num_classes
=
box_scores_shape_list
[
-
1
]
box_scores
=
tf
.
slice
(
box_scores
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
if
bbox_per_class
:
num_detections
=
num_locations
*
(
num_classes
-
1
)
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
[
batch_size
,
num_locations
,
num_classes
,
4
])
raw_boxes
=
tf
.
slice
(
raw_boxes
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
[
batch_size
,
num_detections
,
4
])
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
[
batch_size
,
num_detections
,
4
])
# Box decoding.
decoded_boxes
=
box_ops
.
decode_boxes
(
raw_boxes
,
anchor_boxes
,
weights
=
regression_weights
)
# Box clipping
decoded_boxes
=
box_ops
.
clip_boxes
(
decoded_boxes
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
if
bbox_per_class
:
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
,
[
batch_size
,
num_locations
,
num_classes
-
1
,
4
])
else
:
decoded_boxes
=
tf
.
expand_dims
(
decoded_boxes
,
axis
=
2
)
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
decoded_boxes
,
'decoded_box_scores'
:
box_scores
,
}
# Optionally force the NMS be run on CPU.
if
self
.
_config_dict
[
'use_cpu_nms'
]:
nms_context
=
tf
.
device
(
'cpu:0'
)
else
:
nms_context
=
contextlib
.
nullcontext
()
with
nms_context
:
if
self
.
_config_dict
[
'nms_version'
]
==
'batched'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
decoded_boxes
,
box_scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v1'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
_
)
=
(
_generate_detections_v1
(
decoded_boxes
,
box_scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
],
soft_nms_sigma
=
self
.
_config_dict
[
'soft_nms_sigma'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v2'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_v2
(
decoded_boxes
,
box_scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
else
:
raise
ValueError
(
'NMS version {} not supported.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelDetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates detected boxes with scores and classes for one-stage detector."""
def
__init__
(
self
,
apply_nms
:
bool
=
True
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
nms_version
:
str
=
'v1'
,
use_cpu_nms
:
bool
=
False
,
soft_nms_sigma
:
Optional
[
float
]
=
None
,
tflite_post_processing_config
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
**
kwargs
):
"""Initializes a multi-level detection generator.
Args:
apply_nms: A `bool` of whether or not apply non maximum suppression. If
False, the decoded boxes and their scores are returned.
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are thrown
away.
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
nms_version: A string of `batched`, `v1` or `v2` specifies NMS version
use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
tflite_post_processing_config: An optional dictionary containing
post-processing parameters used for TFLite custom NMS op.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
}
if
tflite_post_processing_config
is
not
None
:
self
.
_config_dict
.
update
(
{
'tflite_post_processing_config'
:
tflite_post_processing_config
})
super
(
MultilevelDetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
_decode_multilevel_outputs
(
self
,
raw_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
raw_scores
:
Mapping
[
str
,
tf
.
Tensor
],
anchor_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
image_shape
:
tf
.
Tensor
,
raw_attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
):
"""Collects dict of multilevel boxes, scores, attributes into lists."""
boxes
=
[]
scores
=
[]
if
raw_attributes
:
attributes
=
{
att_name
:
[]
for
att_name
in
raw_attributes
.
keys
()}
else
:
attributes
=
{}
levels
=
list
(
raw_boxes
.
keys
())
min_level
=
int
(
min
(
levels
))
max_level
=
int
(
max
(
levels
))
for
i
in
range
(
min_level
,
max_level
+
1
):
raw_boxes_i
=
raw_boxes
[
str
(
i
)]
raw_scores_i
=
raw_scores
[
str
(
i
)]
batch_size
=
tf
.
shape
(
raw_boxes_i
)[
0
]
(
_
,
feature_h_i
,
feature_w_i
,
num_anchors_per_locations_times_4
)
=
raw_boxes_i
.
get_shape
().
as_list
()
num_locations
=
feature_h_i
*
feature_w_i
num_anchors_per_locations
=
num_anchors_per_locations_times_4
//
4
num_classes
=
raw_scores_i
.
get_shape
().
as_list
(
)[
-
1
]
//
num_anchors_per_locations
# Applies score transformation and remove the implicit background class.
scores_i
=
tf
.
sigmoid
(
tf
.
reshape
(
raw_scores_i
,
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
num_classes
]))
scores_i
=
tf
.
slice
(
scores_i
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
# Box decoding.
# The anchor boxes are shared for all data in a batch.
# One stage detector only supports class agnostic box regression.
anchor_boxes_i
=
tf
.
reshape
(
anchor_boxes
[
str
(
i
)],
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
4
])
raw_boxes_i
=
tf
.
reshape
(
raw_boxes_i
,
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
4
])
boxes_i
=
box_ops
.
decode_boxes
(
raw_boxes_i
,
anchor_boxes_i
)
# Box clipping.
boxes_i
=
box_ops
.
clip_boxes
(
boxes_i
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
boxes
.
append
(
boxes_i
)
scores
.
append
(
scores_i
)
if
raw_attributes
:
for
att_name
,
raw_att
in
raw_attributes
.
items
():
attribute_size
=
raw_att
[
str
(
i
)].
get_shape
().
as_list
()[
-
1
]
//
num_anchors_per_locations
att_i
=
tf
.
reshape
(
raw_att
[
str
(
i
)],
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
attribute_size
])
attributes
[
att_name
].
append
(
att_i
)
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
boxes
=
tf
.
expand_dims
(
boxes
,
axis
=
2
)
scores
=
tf
.
concat
(
scores
,
axis
=
1
)
if
raw_attributes
:
for
att_name
in
raw_attributes
.
keys
():
attributes
[
att_name
]
=
tf
.
concat
(
attributes
[
att_name
],
axis
=
1
)
attributes
[
att_name
]
=
tf
.
expand_dims
(
attributes
[
att_name
],
axis
=
2
)
return
boxes
,
scores
,
attributes
def
__call__
(
self
,
raw_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
raw_scores
:
Mapping
[
str
,
tf
.
Tensor
],
anchor_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
image_shape
:
tf
.
Tensor
,
raw_attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
):
"""Generates final detections.
Args:
raw_boxes: A `dict` with keys representing FPN levels and values
representing box tenors of shape `[batch, feature_h, feature_w,
num_anchors * 4]`.
raw_scores: A `dict` with keys representing FPN levels and values
representing logit tensors of shape `[batch, feature_h, feature_w,
num_anchors]`.
anchor_boxes: A `dict` with keys representing FPN levels and values
representing anchor tenors of shape `[batch_size, K, 4]` representing
the corresponding anchor boxes w.r.t `box_outputs`.
image_shape: A `tf.Tensor` of shape of [batch_size, 2] storing the image
height and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
raw_attributes: If not None, a `dict` of (attribute_name,
attribute_prediction) pairs. `attribute_prediction` is a dict that
contains keys representing FPN levels and values representing tenors of
shape `[batch, feature_h, feature_w, num_anchors * attribute_size]`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` tf.Tensor of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
`detection_attributes`: A dict. Values of the dict is a `float`
tf.Tensor of shape [batch, max_num_detections, attribute_size]
representing attribute predictions for detected boxes.
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: A `float` tf.Tensor of shape
[batch, num_raw_boxes] representing socres of all the decoded boxes.
`decoded_box_attributes`: A dict. Values in the dict is a
`float` tf.Tensor of shape [batch, num_raw_boxes, attribute_size]
representing attribute predictions of all the decoded boxes.
"""
if
self
.
_config_dict
[
'apply_nms'
]
and
self
.
_config_dict
[
'nms_version'
]
==
'tflite'
:
boxes
,
classes
,
scores
,
num_detections
=
_generate_detections_tflite
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
self
.
get_config
()[
'tflite_post_processing_config'
])
return
{
'num_detections'
:
num_detections
,
'detection_boxes'
:
boxes
,
'detection_classes'
:
classes
,
'detection_scores'
:
scores
}
boxes
,
scores
,
attributes
=
self
.
_decode_multilevel_outputs
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
,
raw_attributes
)
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
boxes
,
'decoded_box_scores'
:
scores
,
'decoded_box_attributes'
:
attributes
,
}
# Optionally force the NMS to run on CPU.
if
self
.
_config_dict
[
'use_cpu_nms'
]:
nms_context
=
tf
.
device
(
'cpu:0'
)
else
:
nms_context
=
contextlib
.
nullcontext
()
with
nms_context
:
if
raw_attributes
and
(
self
.
_config_dict
[
'nms_version'
]
!=
'v1'
):
raise
ValueError
(
'Attribute learning is only supported for NMSv1 but NMS {} is used.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
if
self
.
_config_dict
[
'nms_version'
]
==
'batched'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
# Set `nmsed_attributes` to None for batched NMS.
nmsed_attributes
=
{}
elif
self
.
_config_dict
[
'nms_version'
]
==
'v1'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
)
=
(
_generate_detections_v1
(
boxes
,
scores
,
attributes
=
attributes
if
raw_attributes
else
None
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
],
soft_nms_sigma
=
self
.
_config_dict
[
'soft_nms_sigma'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v2'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_v2
(
boxes
,
scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
# Set `nmsed_attributes` to None for v2.
nmsed_attributes
=
{}
else
:
raise
ValueError
(
'NMS version {} not supported.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
'detection_attributes'
:
nmsed_attributes
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
Prev
1
…
4
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment