Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
8a9a607c
Commit
8a9a607c
authored
Jan 31, 2022
by
Yeqing Li
Committed by
A. Unique TensorFlower
Jan 31, 2022
Browse files
Internal change
PiperOrigin-RevId: 425419954
parent
b7bb52f0
Changes
74
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3793 additions
and
0 deletions
+3793
-0
official/vision/modeling/backbones/spinenet_mobile.py
official/vision/modeling/backbones/spinenet_mobile.py
+539
-0
official/vision/modeling/backbones/spinenet_mobile_test.py
official/vision/modeling/backbones/spinenet_mobile_test.py
+112
-0
official/vision/modeling/backbones/spinenet_test.py
official/vision/modeling/backbones/spinenet_test.py
+128
-0
official/vision/modeling/classification_model.py
official/vision/modeling/classification_model.py
+122
-0
official/vision/modeling/classification_model_test.py
official/vision/modeling/classification_model_test.py
+184
-0
official/vision/modeling/decoders/__init__.py
official/vision/modeling/decoders/__init__.py
+20
-0
official/vision/modeling/decoders/aspp.py
official/vision/modeling/decoders/aspp.py
+203
-0
official/vision/modeling/decoders/aspp_test.py
official/vision/modeling/decoders/aspp_test.py
+94
-0
official/vision/modeling/decoders/factory.py
official/vision/modeling/decoders/factory.py
+135
-0
official/vision/modeling/decoders/factory_test.py
official/vision/modeling/decoders/factory_test.py
+159
-0
official/vision/modeling/decoders/fpn.py
official/vision/modeling/decoders/fpn.py
+246
-0
official/vision/modeling/decoders/fpn_test.py
official/vision/modeling/decoders/fpn_test.py
+117
-0
official/vision/modeling/decoders/nasfpn.py
official/vision/modeling/decoders/nasfpn.py
+368
-0
official/vision/modeling/decoders/nasfpn_test.py
official/vision/modeling/decoders/nasfpn_test.py
+59
-0
official/vision/modeling/factory.py
official/vision/modeling/factory.py
+385
-0
official/vision/modeling/factory_3d.py
official/vision/modeling/factory_3d.py
+103
-0
official/vision/modeling/factory_test.py
official/vision/modeling/factory_test.py
+132
-0
official/vision/modeling/heads/__init__.py
official/vision/modeling/heads/__init__.py
+22
-0
official/vision/modeling/heads/dense_prediction_heads.py
official/vision/modeling/heads/dense_prediction_heads.py
+517
-0
official/vision/modeling/heads/dense_prediction_heads_test.py
...cial/vision/modeling/heads/dense_prediction_heads_test.py
+148
-0
No files found.
official/vision/modeling/backbones/spinenet_mobile.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions of Mobile SpineNet Networks."""
import
math
from
typing
import
Any
,
List
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.backbones
import
factory
from
official.vision.modeling.layers
import
nn_blocks
from
official.vision.modeling.layers
import
nn_layers
from
official.vision.ops
import
spatial_transform_ops
layers
=
tf
.
keras
.
layers
FILTER_SIZE_MAP
=
{
0
:
8
,
1
:
16
,
2
:
24
,
3
:
40
,
4
:
80
,
5
:
112
,
6
:
112
,
7
:
112
,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS
=
[
(
2
,
'mbconv'
,
(
0
,
1
),
False
),
(
2
,
'mbconv'
,
(
1
,
2
),
False
),
(
4
,
'mbconv'
,
(
1
,
2
),
False
),
(
3
,
'mbconv'
,
(
3
,
4
),
False
),
(
4
,
'mbconv'
,
(
3
,
5
),
False
),
(
6
,
'mbconv'
,
(
4
,
6
),
False
),
(
4
,
'mbconv'
,
(
4
,
6
),
False
),
(
5
,
'mbconv'
,
(
7
,
8
),
False
),
(
7
,
'mbconv'
,
(
7
,
9
),
False
),
(
5
,
'mbconv'
,
(
9
,
10
),
False
),
(
5
,
'mbconv'
,
(
9
,
11
),
False
),
(
4
,
'mbconv'
,
(
6
,
11
),
True
),
(
3
,
'mbconv'
,
(
5
,
11
),
True
),
(
5
,
'mbconv'
,
(
8
,
13
),
True
),
(
7
,
'mbconv'
,
(
6
,
15
),
True
),
(
6
,
'mbconv'
,
(
13
,
15
),
True
),
]
SCALING_MAP
=
{
'49'
:
{
'endpoints_num_filters'
:
48
,
'filter_size_scale'
:
1.0
,
'block_repeats'
:
1
,
},
'49S'
:
{
'endpoints_num_filters'
:
40
,
'filter_size_scale'
:
0.65
,
'block_repeats'
:
1
,
},
'49XS'
:
{
'endpoints_num_filters'
:
24
,
'filter_size_scale'
:
0.6
,
'block_repeats'
:
1
,
},
}
class
BlockSpec
(
object
):
"""A container class that specifies the block configuration for SpineNet."""
def
__init__
(
self
,
level
:
int
,
block_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
block_fn
=
block_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for SpineNet."""
if
not
block_specs
:
block_specs
=
SPINENET_BLOCK_SPECS
logging
.
info
(
'Building SpineNet block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpineNetMobile
(
tf
.
keras
.
Model
):
"""Creates a Mobile SpineNet family model.
This implements:
[1] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
Yin Cui, Quoc V. Le, Xiaodan Song.
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
(https://arxiv.org/abs/1912.05027).
[2] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Yin Cui, Mingxing Tan,
Quoc Le, Xiaodan Song.
Efficient Scale-Permuted Backbone with Learned Resource Distribution.
(https://arxiv.org/abs/2010.11426).
"""
def
__init__
(
self
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
endpoints_num_filters
:
int
=
256
,
se_ratio
:
float
=
0.2
,
block_repeats
:
int
=
1
,
filter_size_scale
:
float
=
1.0
,
expand_ratio
:
int
=
6
,
init_stochastic_depth_rate
=
0.0
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
use_keras_upsampling_2d
:
bool
=
False
,
**
kwargs
):
"""Initializes a Mobile SpineNet model.
Args:
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
min_level: An `int` of min level for output mutiscale features.
max_level: An `int` of max level for output mutiscale features.
block_specs: The block specifications for the SpineNet model discovered by
NAS.
endpoints_num_filters: An `int` of feature dimension for the output
endpoints.
se_ratio: A `float` of Squeeze-and-Excitation ratio.
block_repeats: An `int` of number of blocks contained in the layer.
filter_size_scale: A `float` of multiplier for the filters (number of
channels) for all convolution ops. The value must be greater than zero.
Typical usage will be to set this value in (0, 1) to reduce the number
of parameters or computation cost of the model.
expand_ratio: An `integer` of expansion ratios for inverted bottleneck
blocks.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
endpoints_num_filters
self
.
_se_ratio
=
se_ratio
self
.
_block_repeats
=
block_repeats
self
.
_filter_size_scale
=
filter_size_scale
self
.
_expand_ratio
=
expand_ratio
self
.
_init_stochastic_depth_rate
=
init_stochastic_depth_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_use_keras_upsampling_2d
=
use_keras_upsampling_2d
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
# Build SpineNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
=
self
.
_build_stem
(
inputs
=
inputs
)
input_width
=
input_specs
.
shape
[
2
]
if
input_width
is
None
:
max_stride
=
max
(
map
(
lambda
b
:
b
.
level
,
block_specs
))
input_width
=
2
**
max_stride
net
=
self
.
_build_scale_permuted_network
(
net
=
net
,
input_width
=
input_width
)
endpoints
=
self
.
_build_endpoints
(
net
=
net
)
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
endpoints
)
def
_block_group
(
self
,
inputs
:
tf
.
Tensor
,
in_filters
:
int
,
out_filters
:
int
,
strides
:
int
,
expand_ratio
:
int
=
6
,
block_repeats
:
int
=
1
,
se_ratio
:
float
=
0.2
,
stochastic_depth_drop_rate
:
Optional
[
float
]
=
None
,
name
:
str
=
'block_group'
):
"""Creates one group of blocks for the SpineNet model."""
x
=
nn_blocks
.
InvertedBottleneckBlock
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
strides
=
strides
,
se_ratio
=
se_ratio
,
expand_ratio
=
expand_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
_
in
range
(
1
,
block_repeats
):
x
=
nn_blocks
.
InvertedBottleneckBlock
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
strides
=
1
,
se_ratio
=
se_ratio
,
expand_ratio
=
expand_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
return
tf
.
keras
.
layers
.
Activation
(
'linear'
,
name
=
name
)(
x
)
def
_build_stem
(
self
,
inputs
):
"""Builds SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
int
(
FILTER_SIZE_MAP
[
0
]
*
self
.
_filter_size_scale
),
kernel_size
=
3
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
net
=
[]
stem_strides
=
[
1
,
2
]
# Build the initial level 2 blocks.
for
i
in
range
(
self
.
_num_init_blocks
):
x
=
self
.
_block_group
(
inputs
=
x
,
in_filters
=
int
(
FILTER_SIZE_MAP
[
i
]
*
self
.
_filter_size_scale
),
out_filters
=
int
(
FILTER_SIZE_MAP
[
i
+
1
]
*
self
.
_filter_size_scale
),
expand_ratio
=
self
.
_expand_ratio
,
strides
=
stem_strides
[
i
],
se_ratio
=
self
.
_se_ratio
,
block_repeats
=
self
.
_block_repeats
,
name
=
'stem_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
return
net
def
_build_scale_permuted_network
(
self
,
net
,
input_width
,
weighted_fusion
=
False
):
"""Builds scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
)),
int
(
math
.
ceil
(
input_width
/
2
**
2
))
]
num_outgoing_connections
=
[
0
]
*
len
(
net
)
endpoints
=
{}
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
# Update block level if it is larger than max_level to avoid building
# blocks smaller than requested.
block_spec
.
level
=
min
(
block_spec
.
level
,
self
.
_max_level
)
# Find out specs for the target block.
target_width
=
int
(
math
.
ceil
(
input_width
/
2
**
block_spec
.
level
))
target_num_filters
=
int
(
FILTER_SIZE_MAP
[
block_spec
.
level
]
*
self
.
_filter_size_scale
)
# Resample then merge input0 and input1.
parents
=
[]
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
x0
=
self
.
_resample_with_sepconv
(
inputs
=
net
[
input0
],
input_width
=
net_sizes
[
input0
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
)
parents
.
append
(
x0
)
num_outgoing_connections
[
input0
]
+=
1
x1
=
self
.
_resample_with_sepconv
(
inputs
=
net
[
input1
],
input_width
=
net_sizes
[
input1
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
)
parents
.
append
(
x1
)
num_outgoing_connections
[
input1
]
+=
1
# Merge 0 outdegree blocks to the output block.
if
block_spec
.
is_output
:
for
j
,
(
j_feat
,
j_connections
)
in
enumerate
(
zip
(
net
,
num_outgoing_connections
)):
if
j_connections
==
0
and
(
j_feat
.
shape
[
2
]
==
target_width
and
j_feat
.
shape
[
3
]
==
x0
.
shape
[
3
]):
parents
.
append
(
j_feat
)
num_outgoing_connections
[
j
]
+=
1
# pylint: disable=g-direct-tensorflow-import
if
weighted_fusion
:
dtype
=
parents
[
0
].
dtype
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
layers
.
Add
()(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
layers
.
Add
()(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
in_filters
=
target_num_filters
,
out_filters
=
target_num_filters
,
strides
=
1
,
se_ratio
=
self
.
_se_ratio
,
expand_ratio
=
self
.
_expand_ratio
,
block_repeats
=
self
.
_block_repeats
,
stochastic_depth_drop_rate
=
nn_layers
.
get_stochastic_depth_rate
(
self
.
_init_stochastic_depth_rate
,
i
+
1
,
len
(
self
.
_block_specs
)),
name
=
'scale_permuted_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
net_sizes
.
append
(
target_width
)
num_outgoing_connections
.
append
(
0
)
# Save output feats.
if
block_spec
.
is_output
:
if
block_spec
.
level
in
endpoints
:
raise
ValueError
(
'Duplicate feats found for output level {}.'
.
format
(
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
logging
.
warning
(
'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.'
,
self
.
_min_level
,
self
.
_max_level
)
endpoints
[
str
(
block_spec
.
level
)]
=
x
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Matches filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
filters
=
self
.
_endpoints_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
net
[
str
(
level
)])
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
endpoints
[
str
(
level
)]
=
x
return
endpoints
def
_resample_with_sepconv
(
self
,
inputs
,
input_width
,
target_width
,
target_num_filters
):
"""Matches resolution and feature dimension."""
x
=
inputs
# Spatial resampling.
if
input_width
>
target_width
:
while
input_width
>
target_width
:
x
=
layers
.
DepthwiseConv2D
(
kernel_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
,
use_keras_layer
=
self
.
_use_keras_upsampling_2d
)
# Last 1x1 conv to match filter size.
x
=
layers
.
Conv2D
(
filters
=
target_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
return
x
def
get_config
(
self
):
config_dict
=
{
'min_level'
:
self
.
_min_level
,
'max_level'
:
self
.
_max_level
,
'endpoints_num_filters'
:
self
.
_endpoints_num_filters
,
'se_ratio'
:
self
.
_se_ratio
,
'expand_ratio'
:
self
.
_expand_ratio
,
'block_repeats'
:
self
.
_block_repeats
,
'filter_size_scale'
:
self
.
_filter_size_scale
,
'init_stochastic_depth_rate'
:
self
.
_init_stochastic_depth_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'use_keras_upsampling_2d'
:
self
.
_use_keras_upsampling_2d
,
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
):
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'spinenet_mobile'
)
def
build_spinenet_mobile
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Mobile SpineNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'spinenet_mobile'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
model_id
=
backbone_cfg
.
model_id
if
model_id
not
in
SCALING_MAP
:
raise
ValueError
(
'Mobile SpineNet-{} is not a valid architecture.'
.
format
(
model_id
))
scaling_params
=
SCALING_MAP
[
model_id
]
return
SpineNetMobile
(
input_specs
=
input_specs
,
min_level
=
backbone_cfg
.
min_level
,
max_level
=
backbone_cfg
.
max_level
,
endpoints_num_filters
=
scaling_params
[
'endpoints_num_filters'
],
block_repeats
=
scaling_params
[
'block_repeats'
],
filter_size_scale
=
scaling_params
[
'filter_size_scale'
],
se_ratio
=
backbone_cfg
.
se_ratio
,
expand_ratio
=
backbone_cfg
.
expand_ratio
,
init_stochastic_depth_rate
=
backbone_cfg
.
stochastic_depth_drop_rate
,
kernel_regularizer
=
l2_regularizer
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
use_keras_upsampling_2d
=
backbone_cfg
.
use_keras_upsampling_2d
)
official/vision/modeling/backbones/spinenet_mobile_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for SpineNet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
spinenet_mobile
class
SpineNetMobileTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
0.6
,
1
,
0.0
,
24
),
(
128
,
0.65
,
1
,
0.2
,
40
),
(
256
,
1.0
,
1
,
0.2
,
48
),
)
def
test_network_creation
(
self
,
input_size
,
filter_size_scale
,
block_repeats
,
se_ratio
,
endpoints_num_filters
):
"""Test creation of SpineNet models."""
min_level
=
3
max_level
=
7
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
,
input_size
,
3
])
model
=
spinenet_mobile
.
SpineNetMobile
(
input_specs
=
input_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
endpoints_num_filters
=
endpoints_num_filters
,
resample_alpha
=
se_ratio
,
block_repeats
=
block_repeats
,
filter_size_scale
=
filter_size_scale
,
init_stochastic_depth_rate
=
0.2
,
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
model
(
inputs
)
for
l
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
l
),
endpoints
.
keys
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
l
,
input_size
/
2
**
l
,
endpoints_num_filters
],
endpoints
[
str
(
l
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
256
,
se_ratio
=
0.2
,
expand_ratio
=
6
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
init_stochastic_depth_rate
=
0.2
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
use_keras_upsampling_2d
=
False
,
)
network
=
spinenet_mobile
.
SpineNetMobile
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
spinenet_mobile
.
SpineNetMobile
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/backbones/spinenet_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for SpineNet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
spinenet
class
SpineNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
0.65
,
1
,
0.5
,
128
,
4
,
6
),
(
256
,
1.0
,
1
,
0.5
,
256
,
3
,
6
),
(
384
,
1.0
,
2
,
0.5
,
256
,
4
,
7
),
(
512
,
1.0
,
3
,
1.0
,
256
,
3
,
7
),
(
640
,
1.3
,
4
,
1.0
,
384
,
3
,
7
),
)
def
test_network_creation
(
self
,
input_size
,
filter_size_scale
,
block_repeats
,
resample_alpha
,
endpoints_num_filters
,
min_level
,
max_level
):
"""Test creation of SpineNet models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
,
input_size
,
3
])
model
=
spinenet
.
SpineNet
(
input_specs
=
input_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
endpoints_num_filters
=
endpoints_num_filters
,
resample_alpha
=
resample_alpha
,
block_repeats
=
block_repeats
,
filter_size_scale
=
filter_size_scale
,
init_stochastic_depth_rate
=
0.2
,
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
model
(
inputs
)
for
l
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
l
),
endpoints
.
keys
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
l
,
input_size
/
2
**
l
,
endpoints_num_filters
],
endpoints
[
str
(
l
)].
shape
.
as_list
())
@
parameterized
.
parameters
(
((
128
,
128
),
(
128
,
128
)),
((
128
,
128
),
(
256
,
256
)),
((
640
,
640
),
(
896
,
1664
)),
)
def
test_load_from_different_input_specs
(
self
,
input_size_1
,
input_size_2
):
"""Test loading checkpoints with different input size."""
def
build_spinenet
(
input_size
):
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model
=
spinenet
.
SpineNet
(
input_specs
=
input_specs
,
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
384
,
resample_alpha
=
1.0
,
block_repeats
=
2
,
filter_size_scale
=
0.5
)
return
model
model_1
=
build_spinenet
(
input_size_1
)
model_2
=
build_spinenet
(
input_size_2
)
ckpt_1
=
tf
.
train
.
Checkpoint
(
backbone
=
model_1
)
ckpt_2
=
tf
.
train
.
Checkpoint
(
backbone
=
model_2
)
ckpt_path
=
self
.
get_temp_dir
()
+
'/ckpt'
ckpt_1
.
write
(
ckpt_path
)
ckpt_2
.
restore
(
ckpt_path
).
expect_partial
()
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
256
,
resample_alpha
=
0.5
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
init_stochastic_depth_rate
=
0.2
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
spinenet
.
SpineNet
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
spinenet
.
SpineNet
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/classification_model.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build classification models."""
from
typing
import
Any
,
Mapping
,
Optional
# Import libraries
import
tensorflow
as
tf
layers
=
tf
.
keras
.
layers
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ClassificationModel
(
tf
.
keras
.
Model
):
"""A classification class builder."""
def
__init__
(
self
,
backbone
:
tf
.
keras
.
Model
,
num_classes
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
dropout_rate
:
float
=
0.0
,
kernel_initializer
:
str
=
'random_uniform'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
add_head_batch_norm
:
bool
=
False
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
skip_logits_layer
:
bool
=
False
,
**
kwargs
):
"""Classification initialization function.
Args:
backbone: a backbone network.
num_classes: `int` number of classes in classification task.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
dropout_rate: `float` rate for dropout regularization.
kernel_initializer: kernel initializer for the dense layer.
kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
add_head_batch_norm: `bool` whether to add a batch normalization layer
before pool.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
skip_logits_layer: `bool`, whether to skip the prediction layer.
**kwargs: keyword arguments to be passed.
"""
if
use_sync_bn
:
norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
norm
=
tf
.
keras
.
layers
.
BatchNormalization
axis
=
-
1
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
else
1
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:],
name
=
input_specs
.
name
)
endpoints
=
backbone
(
inputs
)
x
=
endpoints
[
max
(
endpoints
.
keys
())]
if
add_head_batch_norm
:
x
=
norm
(
axis
=
axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf
.
keras
.
layers
.
GlobalAveragePooling2D
()(
x
)
if
not
skip_logits_layer
:
x
=
tf
.
keras
.
layers
.
Dropout
(
dropout_rate
)(
x
)
x
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
x
)
super
(
ClassificationModel
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
x
,
**
kwargs
)
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'num_classes'
:
num_classes
,
'input_specs'
:
input_specs
,
'dropout_rate'
:
dropout_rate
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'add_head_batch_norm'
:
add_head_batch_norm
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
}
self
.
_input_specs
=
input_specs
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_backbone
=
backbone
self
.
_norm
=
norm
@
property
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
tf
.
keras
.
Model
]:
"""Returns a dictionary of items to be additionally checkpointed."""
return
dict
(
backbone
=
self
.
backbone
)
@
property
def
backbone
(
self
)
->
tf
.
keras
.
Model
:
return
self
.
_backbone
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/modeling/classification_model_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for classification network."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.modeling
import
backbones
from
official.vision.modeling
import
classification_model
class
ClassificationNetworkTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
50
,
'relu'
),
(
128
,
50
,
'relu'
),
(
128
,
50
,
'swish'
),
)
def
test_resnet_network_creation
(
self
,
input_size
,
resnet_model_id
,
activation
):
"""Test for creation of a ResNet-50 classifier."""
inputs
=
np
.
random
.
rand
(
2
,
input_size
,
input_size
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet
(
model_id
=
resnet_model_id
,
activation
=
activation
)
self
.
assertEqual
(
backbone
.
count_params
(),
23561152
)
num_classes
=
1000
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
)
self
.
assertEqual
(
model
.
count_params
(),
25610152
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
def
test_revnet_network_creation
(
self
):
"""Test for creation of a RevNet-56 classifier."""
revnet_model_id
=
56
inputs
=
np
.
random
.
rand
(
2
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
RevNet
(
model_id
=
revnet_model_id
)
self
.
assertEqual
(
backbone
.
count_params
(),
19473792
)
num_classes
=
1000
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
add_head_batch_norm
=
True
,
)
self
.
assertEqual
(
model
.
count_params
(),
22816104
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
combinations
.
generate
(
combinations
.
combine
(
mobilenet_model_id
=
[
'MobileNetV1'
,
'MobileNetV2'
,
'MobileNetV3Large'
,
'MobileNetV3Small'
,
'MobileNetV3EdgeTPU'
,
'MobileNetMultiAVG'
,
'MobileNetMultiMAX'
,
],
filter_size_scale
=
[
1.0
,
0.75
],
))
def
test_mobilenet_network_creation
(
self
,
mobilenet_model_id
,
filter_size_scale
):
"""Test for creation of a MobileNet classifier."""
inputs
=
np
.
random
.
rand
(
2
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
MobileNet
(
model_id
=
mobilenet_model_id
,
filter_size_scale
=
filter_size_scale
)
num_classes
=
1001
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
use_sync_bn
=
[
False
,
True
],
))
def
test_sync_bn_multiple_devices
(
self
,
strategy
,
use_sync_bn
):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
64
,
128
,
128
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
with
strategy
.
scope
():
backbone
=
backbones
.
ResNet
(
model_id
=
50
,
use_sync_bn
=
use_sync_bn
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
,
dropout_rate
=
0.2
,
)
_
=
model
(
inputs
)
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
one_device_strategy_gpu
,
],
data_format
=
[
'channels_last'
,
'channels_first'
],
input_dim
=
[
1
,
3
,
4
]))
def
test_data_format_gpu
(
self
,
strategy
,
data_format
,
input_dim
):
"""Test for different data formats on GPU devices."""
if
data_format
==
'channels_last'
:
inputs
=
np
.
random
.
rand
(
2
,
128
,
128
,
input_dim
)
else
:
inputs
=
np
.
random
.
rand
(
2
,
input_dim
,
128
,
128
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
inputs
.
shape
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
with
strategy
.
scope
():
backbone
=
backbones
.
ResNet
(
model_id
=
50
,
input_specs
=
input_specs
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
,
input_specs
=
input_specs
,
)
_
=
model
(
inputs
)
def
test_serialize_deserialize
(
self
):
"""Validate the classification net can be serialized and deserialized."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet
(
model_id
=
50
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
)
config
=
model
.
get_config
()
new_model
=
classification_model
.
ClassificationModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/__init__.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Decoders package definition."""
from
official.vision.modeling.decoders.aspp
import
ASPP
from
official.vision.modeling.decoders.fpn
import
FPN
from
official.vision.modeling.decoders.nasfpn
import
NASFPN
official/vision/modeling/decoders/aspp.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.vision.modeling.decoders
import
factory
from
official.vision.modeling.layers
import
deeplab
from
official.vision.modeling.layers
import
nn_layers
TensorMapUnion
=
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ASPP
(
tf
.
keras
.
layers
.
Layer
):
"""Creates an Atrous Spatial Pyramid Pooling (ASPP) layer."""
def
__init__
(
self
,
level
:
int
,
dilation_rates
:
List
[
int
],
num_filters
:
int
=
256
,
pool_kernel_size
:
Optional
[
int
]
=
None
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
activation
:
str
=
'relu'
,
dropout_rate
:
float
=
0.0
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
interpolation
:
str
=
'bilinear'
,
use_depthwise_convolution
:
bool
=
False
,
spp_layer_version
:
str
=
'v1'
,
output_tensor
:
bool
=
False
,
**
kwargs
):
"""Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
Args:
level: An `int` level to apply ASPP.
dilation_rates: A `list` of dilation rates.
num_filters: An `int` number of output filters in ASPP.
pool_kernel_size: A `list` of [height, width] of pooling kernel size or
None. Pooling size is with respect to original image size, it will be
scaled down by 2**level. If None, global average pooling is used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
activation: A `str` activation to be used in ASPP.
dropout_rate: A `float` rate for dropout regularization.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
interpolation: A `str` of interpolation method. It should be one of
`bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
`gaussian`, or `mitchellcubic`.
use_depthwise_convolution: If True depthwise separable convolutions will
be added to the Atrous spatial pyramid pooling.
spp_layer_version: A `str` of spatial pyramid pooling layer version.
output_tensor: Whether to output a single tensor or a dictionary of tensor.
Default is false.
**kwargs: Additional keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'level'
:
level
,
'dilation_rates'
:
dilation_rates
,
'num_filters'
:
num_filters
,
'pool_kernel_size'
:
pool_kernel_size
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'activation'
:
activation
,
'dropout_rate'
:
dropout_rate
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'interpolation'
:
interpolation
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'spp_layer_version'
:
spp_layer_version
,
'output_tensor'
:
output_tensor
}
self
.
_aspp_layer
=
deeplab
.
SpatialPyramidPooling
if
self
.
_config_dict
[
'spp_layer_version'
]
==
'v1'
else
nn_layers
.
SpatialPyramidPooling
def
build
(
self
,
input_shape
):
pool_kernel_size
=
None
if
self
.
_config_dict
[
'pool_kernel_size'
]:
pool_kernel_size
=
[
int
(
p_size
//
2
**
self
.
_config_dict
[
'level'
])
for
p_size
in
self
.
_config_dict
[
'pool_kernel_size'
]
]
self
.
aspp
=
self
.
_aspp_layer
(
output_channels
=
self
.
_config_dict
[
'num_filters'
],
dilation_rates
=
self
.
_config_dict
[
'dilation_rates'
],
pool_kernel_size
=
pool_kernel_size
,
use_sync_bn
=
self
.
_config_dict
[
'use_sync_bn'
],
batchnorm_momentum
=
self
.
_config_dict
[
'norm_momentum'
],
batchnorm_epsilon
=
self
.
_config_dict
[
'norm_epsilon'
],
activation
=
self
.
_config_dict
[
'activation'
],
dropout
=
self
.
_config_dict
[
'dropout_rate'
],
kernel_initializer
=
self
.
_config_dict
[
'kernel_initializer'
],
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
interpolation
=
self
.
_config_dict
[
'interpolation'
],
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
]
)
def
call
(
self
,
inputs
:
TensorMapUnion
)
->
TensorMapUnion
:
"""Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
level is present, if output_tensor is false. Hence, this will be compatible
with the rest of the segmentation model interfaces.
If output_tensor is true, a single tensot is output.
Args:
inputs: A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or
a `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of shape [batch, height_l, width_l,
filter_size].
Returns:
A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or a `dict`
of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of output of ASPP module.
"""
outputs
=
{}
level
=
str
(
self
.
_config_dict
[
'level'
])
backbone_output
=
inputs
[
level
]
if
isinstance
(
inputs
,
dict
)
else
inputs
outputs
=
self
.
aspp
(
backbone_output
)
return
outputs
if
self
.
_config_dict
[
'output_tensor'
]
else
{
level
:
outputs
}
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
self
.
_config_dict
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
factory
.
register_decoder_builder
(
'aspp'
)
def
build_aspp_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds ASPP decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. Note this is for consistent
interface, and is not used by ASPP decoder.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the ASPP decoder.
Raises:
ValueError: If the model_config.decoder.type is not `aspp`.
"""
del
input_specs
# input_specs is not used by ASPP decoder.
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'aspp'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `aspp`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
ASPP
(
level
=
decoder_cfg
.
level
,
dilation_rates
=
decoder_cfg
.
dilation_rates
,
num_filters
=
decoder_cfg
.
num_filters
,
use_depthwise_convolution
=
decoder_cfg
.
use_depthwise_convolution
,
pool_kernel_size
=
decoder_cfg
.
pool_kernel_size
,
dropout_rate
=
decoder_cfg
.
dropout_rate
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
activation
=
norm_activation_config
.
activation
,
kernel_regularizer
=
l2_regularizer
,
spp_layer_version
=
decoder_cfg
.
spp_layer_version
,
output_tensor
=
decoder_cfg
.
output_tensor
)
official/vision/modeling/decoders/aspp_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for aspp."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
aspp
class
ASPPTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
3
,
[
6
,
12
,
18
,
24
],
128
,
'v1'
),
(
3
,
[
6
,
12
,
18
],
128
,
'v1'
),
(
3
,
[
6
,
12
],
256
,
'v1'
),
(
4
,
[
6
,
12
,
18
,
24
],
128
,
'v2'
),
(
4
,
[
6
,
12
,
18
],
128
,
'v2'
),
(
4
,
[
6
,
12
],
256
,
'v2'
),
)
def
test_network_creation
(
self
,
level
,
dilation_rates
,
num_filters
,
spp_layer_version
):
"""Test creation of ASPP."""
input_size
=
256
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
,
spp_layer_version
=
spp_layer_version
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
num_filters
],
feats
[
str
(
level
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
level
=
3
,
dilation_rates
=
[
6
,
12
],
num_filters
=
256
,
pool_kernel_size
=
None
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
activation
=
'relu'
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
interpolation
=
'bilinear'
,
dropout_rate
=
0.2
,
use_depthwise_convolution
=
'false'
,
spp_layer_version
=
'v1'
,
output_tensor
=
False
,
dtype
=
'float32'
,
name
=
'aspp'
,
trainable
=
True
)
network
=
aspp
.
ASPP
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
aspp
.
ASPP
.
from_config
(
network
.
get_config
())
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/factory.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoder registers and factory method.
One can register a new decoder model by the following two steps:
1 Import the factory and register the build in the decoder file.
2 Import the decoder class and add a build in __init__.py.
```
# my_decoder.py
from modeling.decoders import factory
class MyDecoder():
...
@factory.register_decoder_builder('my_decoder')
def build_my_decoder():
return MyDecoder()
# decoders/__init__.py adds import
from modeling.decoders.my_decoder import MyDecoder
```
If one wants the MyDecoder class to be used only by those binary
then don't imported the decoder module in decoders/__init__.py, but import it
in place that uses it.
"""
from
typing
import
Any
,
Callable
,
Mapping
,
Optional
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.core
import
registry
from
official.modeling
import
hyperparams
_REGISTERED_DECODER_CLS
=
{}
def
register_decoder_builder
(
key
:
str
)
->
Callable
[...,
Any
]:
"""Decorates a builder of decoder class.
The builder should be a Callable (a class or a function).
This decorator supports registration of decoder builder as follows:
```
class MyDecoder(tf.keras.Model):
pass
@register_decoder_builder('mydecoder')
def builder(input_specs, config, l2_reg):
return MyDecoder(...)
# Builds a MyDecoder object.
my_decoder = build_decoder_3d(input_specs, config, l2_reg)
```
Args:
key: A `str` of key to look up the builder.
Returns:
A callable for using as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return
registry
.
register
(
_REGISTERED_DECODER_CLS
,
key
)
@
register_decoder_builder
(
'identity'
)
def
build_identity
(
input_specs
:
Optional
[
Mapping
[
str
,
tf
.
TensorShape
]]
=
None
,
model_config
:
Optional
[
hyperparams
.
Config
]
=
None
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
None
:
"""Builds identity decoder from a config.
All the input arguments are not used by identity decoder but kept here to
ensure the interface is consistent.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
An instance of the identity decoder.
"""
del
input_specs
,
model_config
,
l2_regularizer
# Unused by identity decoder.
def
build_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
,
**
kwargs
)
->
Union
[
None
,
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds decoder from a config.
A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
None, the decoder will take features from the backbone as input and generate
decoded feature maps. If it is None, such as an identity decoder, the decoder
is skipped and features from the backbone are regarded as model output.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
**kwargs: Additional keyword args to be passed to decoder builder.
Returns:
An instance of the decoder.
"""
decoder_builder
=
registry
.
lookup
(
_REGISTERED_DECODER_CLS
,
model_config
.
decoder
.
type
)
return
decoder_builder
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
,
**
kwargs
)
official/vision/modeling/decoders/factory_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
official.vision
import
configs
from
official.vision.configs
import
decoders
as
decoders_cfg
from
official.vision.modeling
import
decoders
from
official.vision.modeling.decoders
import
factory
class
FactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
use_separable_conv
=
[
True
,
False
]))
def
test_fpn_decoder_creation
(
self
,
num_filters
,
use_separable_conv
):
"""Test creation of FPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
FPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'fpn'
,
fpn
=
decoders_cfg
.
FPN
(
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
num_repeats
=
[
3
,
5
],
use_separable_conv
=
[
True
,
False
]))
def
test_nasfpn_decoder_creation
(
self
,
num_filters
,
num_repeats
,
use_separable_conv
):
"""Test creation of NASFPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
NASFPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'nasfpn'
,
nasfpn
=
decoders_cfg
.
NASFPN
(
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
level
=
[
3
,
4
],
dilation_rates
=
[[
6
,
12
,
18
],
[
6
,
12
]],
num_filters
=
[
128
,
256
]))
def
test_aspp_decoder_creation
(
self
,
level
,
dilation_rates
,
num_filters
):
"""Test creation of ASPP decoder."""
input_specs
=
{
'1'
:
tf
.
TensorShape
([
1
,
128
,
128
,
3
])}
network
=
decoders
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
,
use_sync_bn
=
True
)
model_config
=
configs
.
semantic_segmentation
.
SemanticSegmentationModel
()
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'aspp'
,
aspp
=
decoders_cfg
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
# Due to calling `super().get_config()` in aspp layer, everything but the
# the name of two layer instances are the same, so we force equal name so it
# will not give false alarm.
factory_network_config
[
'name'
]
=
network_config
[
'name'
]
self
.
assertEqual
(
network_config
,
factory_network_config
)
def
test_identity_decoder_creation
(
self
):
"""Test creation of identity decoder."""
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
num_classes
=
2
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'identity'
,
identity
=
decoders_cfg
.
Identity
())
factory_network
=
factory
.
build_decoder
(
input_specs
=
None
,
model_config
=
model_config
)
self
.
assertIsNone
(
factory_network
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/fpn.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Feature Pyramid Networks (FPN)."""
from
typing
import
Any
,
Mapping
,
Optional
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
FPN
(
tf
.
keras
.
Model
):
"""Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
num_filters
:
int
=
256
,
fusion_type
:
str
=
'sum'
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a Feature Pyramid Network (FPN).
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
num_filters: An `int` number of filters in FPN layers.
fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
concat for feature fusion.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'fusion_type'
:
fusion_type
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
use_separable_conv
:
conv2d
=
tf
.
keras
.
layers
.
SeparableConv2D
else
:
conv2d
=
tf
.
keras
.
layers
.
Conv2D
if
use_sync_bn
:
norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
norm
=
tf
.
keras
.
layers
.
BatchNormalization
activation_fn
=
tf
.
keras
.
layers
.
Activation
(
tf_utils
.
get_activation
(
activation
))
# Build input feature pyramid.
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
# Get input feature pyramid from backbone.
logging
.
info
(
'FPN input_specs: %s'
,
input_specs
)
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
backbone_max_level
=
min
(
int
(
max
(
inputs
.
keys
())),
max_level
)
# Build lateral connections.
feats_lateral
=
{}
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats_lateral
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
inputs
[
str
(
level
)])
# Build top-down path.
feats
=
{
str
(
backbone_max_level
):
feats_lateral
[
str
(
backbone_max_level
)]}
for
level
in
range
(
backbone_max_level
-
1
,
min_level
-
1
,
-
1
):
feat_a
=
spatial_transform_ops
.
nearest_upsampling
(
feats
[
str
(
level
+
1
)],
2
)
feat_b
=
feats_lateral
[
str
(
level
)]
if
fusion_type
==
'sum'
:
feats
[
str
(
level
)]
=
feat_a
+
feat_b
elif
fusion_type
==
'concat'
:
feats
[
str
(
level
)]
=
tf
.
concat
([
feat_a
,
feat_b
],
axis
=-
1
)
else
:
raise
ValueError
(
'Fusion type {} not supported.'
.
format
(
fusion_type
))
# TODO(xianzhi): consider to remove bias in conv2d.
# Build post-hoc 3x3 convolution kernel.
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
1
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats
[
str
(
level
)])
# TODO(xianzhi): consider to remove bias in conv2d.
# Build coarser FPN levels introduced for RetinaNet.
for
level
in
range
(
backbone_max_level
+
1
,
max_level
+
1
):
feats_in
=
feats
[
str
(
level
-
1
)]
if
level
>
backbone_max_level
+
1
:
feats_in
=
activation_fn
(
feats_in
)
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
2
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats_in
)
# Apply batch norm layers.
for
level
in
range
(
min_level
,
max_level
+
1
):
feats
[
str
(
level
)]
=
norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
feats
[
str
(
level
)])
self
.
_output_specs
=
{
str
(
level
):
feats
[
str
(
level
)].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
super
(
FPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'fpn'
)
def
build_fpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'fpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `fpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
FPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
fusion_type
=
decoder_cfg
.
fusion_type
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/fpn_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
mobilenet
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
fpn
class
FPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
,
'sum'
),
(
256
,
3
,
7
,
True
,
'concat'
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
,
fusion_type
):
"""Test creation of FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
fusion_type
=
fusion_type
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation_with_mobilenet
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of FPN with mobilenet backbone."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
mobilenet
.
MobileNet
(
model_id
=
'MobileNetV2'
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
input_specs
=
resnet
.
ResNet
(
model_id
=
50
).
output_specs
,
min_level
=
3
,
max_level
=
7
,
num_filters
=
256
,
fusion_type
=
'sum'
,
use_separable_conv
=
False
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
fpn
.
FPN
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
fpn
.
FPN
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/nasfpn.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS
=
[
(
4
,
'attention'
,
(
1
,
3
),
False
),
(
4
,
'sum'
,
(
1
,
5
),
False
),
(
3
,
'sum'
,
(
0
,
6
),
True
),
(
4
,
'sum'
,
(
6
,
7
),
True
),
(
5
,
'attention'
,
(
7
,
8
),
True
),
(
7
,
'attention'
,
(
6
,
9
),
True
),
(
6
,
'attention'
,
(
9
,
10
),
True
),
]
class
BlockSpec
():
"""A container class that specifies the block configuration for NAS-FPN."""
def
__init__
(
self
,
level
:
int
,
combine_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
combine_fn
=
combine_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if
not
block_specs
:
block_specs
=
NASFPN_BLOCK_SPECS
logging
.
info
(
'Building NAS-FPN block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
NASFPN
(
tf
.
keras
.
Model
):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
num_filters
:
int
=
256
,
num_repeats
:
int
=
5
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'num_repeats'
:
num_repeats
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_num_repeats
=
num_repeats
self
.
_conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
if
self
.
_config_dict
[
'use_separable_conv'
]:
self
.
_conv_kwargs
=
{
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
else
:
self
.
_conv_kwargs
=
{
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
self
.
_norm_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_norm_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
# Gets input feature pyramid from backbone.
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
# Projects the input features.
feats
=
[]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
if
str
(
level
)
in
inputs
.
keys
():
feats
.
append
(
self
.
_resample_feature_map
(
inputs
[
str
(
level
)],
level
,
level
,
self
.
_config_dict
[
'num_filters'
]))
else
:
feats
.
append
(
self
.
_resample_feature_map
(
feats
[
-
1
],
level
-
1
,
level
,
self
.
_config_dict
[
'num_filters'
]))
# Repeatly builds the NAS-FPN modules.
for
_
in
range
(
self
.
_num_repeats
):
output_feats
=
self
.
_build_feature_pyramid
(
feats
)
feats
=
[
output_feats
[
level
]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)]
self
.
_output_specs
=
{
str
(
level
):
output_feats
[
level
].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
output_feats
=
{
str
(
level
):
output_feats
[
level
]
for
level
in
output_feats
.
keys
()}
super
(
NASFPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
output_feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
_resample_feature_map
(
self
,
inputs
,
input_level
,
target_level
,
target_num_filters
=
256
):
x
=
inputs
_
,
_
,
_
,
input_num_filters
=
x
.
get_shape
().
as_list
()
if
input_num_filters
!=
target_num_filters
:
x
=
self
.
_conv_op
(
filters
=
target_num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
x
)
x
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
x
)
if
input_level
<
target_level
:
stride
=
int
(
2
**
(
target_level
-
input_level
))
return
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
stride
,
strides
=
stride
,
padding
=
'same'
)(
x
)
if
input_level
>
target_level
:
scale
=
int
(
2
**
(
input_level
-
target_level
))
return
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype
=
tf
.
keras
.
layers
.
Layer
().
dtype_policy
.
compute_dtype
if
(
compute_dtype
is
not
None
)
and
(
x
.
dtype
!=
compute_dtype
):
return
tf
.
cast
(
x
,
dtype
=
compute_dtype
)
else
:
return
x
def
_global_attention
(
self
,
feat0
,
feat1
):
m
=
tf
.
math
.
reduce_max
(
feat0
,
axis
=
[
1
,
2
],
keepdims
=
True
)
m
=
tf
.
math
.
sigmoid
(
m
)
return
feat0
+
feat1
*
m
def
_build_feature_pyramid
(
self
,
feats
):
num_output_connections
=
[
0
]
*
len
(
feats
)
num_output_levels
=
self
.
_max_level
-
self
.
_min_level
+
1
feat_levels
=
list
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
))
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
new_level
=
block_spec
.
level
# Checks the range of input_offsets.
for
input_offset
in
block_spec
.
input_offsets
:
if
input_offset
>=
len
(
feats
):
raise
ValueError
(
'input_offset ({}) is larger than num feats({})'
.
format
(
input_offset
,
len
(
feats
)))
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
# Update graph with inputs.
node0
=
feats
[
input0
]
node0_level
=
feat_levels
[
input0
]
num_output_connections
[
input0
]
+=
1
node0
=
self
.
_resample_feature_map
(
node0
,
node0_level
,
new_level
)
node1
=
feats
[
input1
]
node1_level
=
feat_levels
[
input1
]
num_output_connections
[
input1
]
+=
1
node1
=
self
.
_resample_feature_map
(
node1
,
node1_level
,
new_level
)
# Combine node0 and node1 to create new feat.
if
block_spec
.
combine_fn
==
'sum'
:
new_node
=
node0
+
node1
elif
block_spec
.
combine_fn
==
'attention'
:
if
node0_level
>=
node1_level
:
new_node
=
self
.
_global_attention
(
node0
,
node1
)
else
:
new_node
=
self
.
_global_attention
(
node1
,
node0
)
else
:
raise
ValueError
(
'unknown combine_fn `{}`.'
.
format
(
block_spec
.
combine_fn
))
# Add intermediate nodes that do not have any connections to output.
if
block_spec
.
is_output
:
for
j
,
(
feat
,
feat_level
,
num_output
)
in
enumerate
(
zip
(
feats
,
feat_levels
,
num_output_connections
)):
if
num_output
==
0
and
feat_level
==
new_level
:
num_output_connections
[
j
]
+=
1
feat_
=
self
.
_resample_feature_map
(
feat
,
feat_level
,
new_level
)
new_node
+=
feat_
new_node
=
self
.
_activation
(
new_node
)
new_node
=
self
.
_conv_op
(
filters
=
self
.
_config_dict
[
'num_filters'
],
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
new_node
)
new_node
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
new_node
)
feats
.
append
(
new_node
)
feat_levels
.
append
(
new_level
)
num_output_connections
.
append
(
0
)
output_feats
=
{}
for
i
in
range
(
len
(
feats
)
-
num_output_levels
,
len
(
feats
)):
level
=
feat_levels
[
i
]
output_feats
[
level
]
=
feats
[
i
]
logging
.
info
(
'Output feature pyramid: %s'
,
output_feats
)
return
output_feats
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'nasfpn'
)
def
build_nasfpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'nasfpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `nasfpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
NASFPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
num_repeats
=
decoder_cfg
.
num_repeats
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/nasfpn_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for NAS-FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
nasfpn
class
NASFPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of NAS-FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
num_filters
=
256
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
nasfpn
.
NASFPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
num_filters
],
feats
[
str
(
level
)].
shape
.
as_list
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/factory.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
from
typing
import
Optional
import
tensorflow
as
tf
from
official.vision.configs
import
image_classification
as
classification_cfg
from
official.vision.configs
import
maskrcnn
as
maskrcnn_cfg
from
official.vision.configs
import
retinanet
as
retinanet_cfg
from
official.vision.configs
import
semantic_segmentation
as
segmentation_cfg
from
official.vision.modeling
import
backbones
from
official.vision.modeling
import
classification_model
from
official.vision.modeling
import
decoders
from
official.vision.modeling
import
maskrcnn_model
from
official.vision.modeling
import
retinanet_model
from
official.vision.modeling
import
segmentation_model
from
official.vision.modeling.heads
import
dense_prediction_heads
from
official.vision.modeling.heads
import
instance_heads
from
official.vision.modeling.heads
import
segmentation_heads
from
official.vision.modeling.layers
import
detection_generator
from
official.vision.modeling.layers
import
mask_sampler
from
official.vision.modeling.layers
import
roi_aligner
from
official.vision.modeling.layers
import
roi_generator
from
official.vision.modeling.layers
import
roi_sampler
def
build_classification_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
classification_cfg
.
ImageClassificationModel
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
skip_logits_layer
:
bool
=
False
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds the classification model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
model_config
.
num_classes
,
input_specs
=
input_specs
,
dropout_rate
=
model_config
.
dropout_rate
,
kernel_initializer
=
model_config
.
kernel_initializer
,
kernel_regularizer
=
l2_regularizer
,
add_head_batch_norm
=
model_config
.
add_head_batch_norm
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
skip_logits_layer
=
skip_logits_layer
)
return
model
def
build_maskrcnn
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
maskrcnn_cfg
.
MaskRCNN
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
Model
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Mask R-CNN model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
backbone_features
=
backbone
(
tf
.
keras
.
Input
(
input_specs
.
shape
[
1
:]))
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
rpn_head_config
=
model_config
.
rpn_head
roi_generator_config
=
model_config
.
roi_generator
roi_sampler_config
=
model_config
.
roi_sampler
roi_aligner_config
=
model_config
.
roi_aligner
detection_head_config
=
model_config
.
detection_head
generator_config
=
model_config
.
detection_generator
num_anchors_per_location
=
(
len
(
model_config
.
anchor
.
aspect_ratios
)
*
model_config
.
anchor
.
num_scales
)
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_anchors_per_location
=
num_anchors_per_location
,
num_convs
=
rpn_head_config
.
num_convs
,
num_filters
=
rpn_head_config
.
num_filters
,
use_separable_conv
=
rpn_head_config
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
model_config
.
num_classes
,
num_convs
=
detection_head_config
.
num_convs
,
num_filters
=
detection_head_config
.
num_filters
,
use_separable_conv
=
detection_head_config
.
use_separable_conv
,
num_fcs
=
detection_head_config
.
num_fcs
,
fc_dims
=
detection_head_config
.
fc_dims
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
name
=
'detection_head'
)
if
decoder
:
decoder_features
=
decoder
(
backbone_features
)
rpn_head
(
decoder_features
)
if
roi_sampler_config
.
cascade_iou_thresholds
:
detection_head_cascade
=
[
detection_head
]
for
cascade_num
in
range
(
len
(
roi_sampler_config
.
cascade_iou_thresholds
)):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
model_config
.
num_classes
,
num_convs
=
detection_head_config
.
num_convs
,
num_filters
=
detection_head_config
.
num_filters
,
use_separable_conv
=
detection_head_config
.
use_separable_conv
,
num_fcs
=
detection_head_config
.
num_fcs
,
fc_dims
=
detection_head_config
.
fc_dims
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
name
=
'detection_head_{}'
.
format
(
cascade_num
+
1
))
detection_head_cascade
.
append
(
detection_head
)
detection_head
=
detection_head_cascade
roi_generator_obj
=
roi_generator
.
MultilevelROIGenerator
(
pre_nms_top_k
=
roi_generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
roi_generator_config
.
pre_nms_score_threshold
,
pre_nms_min_size_threshold
=
(
roi_generator_config
.
pre_nms_min_size_threshold
),
nms_iou_threshold
=
roi_generator_config
.
nms_iou_threshold
,
num_proposals
=
roi_generator_config
.
num_proposals
,
test_pre_nms_top_k
=
roi_generator_config
.
test_pre_nms_top_k
,
test_pre_nms_score_threshold
=
(
roi_generator_config
.
test_pre_nms_score_threshold
),
test_pre_nms_min_size_threshold
=
(
roi_generator_config
.
test_pre_nms_min_size_threshold
),
test_nms_iou_threshold
=
roi_generator_config
.
test_nms_iou_threshold
,
test_num_proposals
=
roi_generator_config
.
test_num_proposals
,
use_batched_nms
=
roi_generator_config
.
use_batched_nms
)
roi_sampler_cascade
=
[]
roi_sampler_obj
=
roi_sampler
.
ROISampler
(
mix_gt_boxes
=
roi_sampler_config
.
mix_gt_boxes
,
num_sampled_rois
=
roi_sampler_config
.
num_sampled_rois
,
foreground_fraction
=
roi_sampler_config
.
foreground_fraction
,
foreground_iou_threshold
=
roi_sampler_config
.
foreground_iou_threshold
,
background_iou_high_threshold
=
(
roi_sampler_config
.
background_iou_high_threshold
),
background_iou_low_threshold
=
(
roi_sampler_config
.
background_iou_low_threshold
))
roi_sampler_cascade
.
append
(
roi_sampler_obj
)
# Initialize addtional roi simplers for cascade heads.
if
roi_sampler_config
.
cascade_iou_thresholds
:
for
iou
in
roi_sampler_config
.
cascade_iou_thresholds
:
roi_sampler_obj
=
roi_sampler
.
ROISampler
(
mix_gt_boxes
=
False
,
num_sampled_rois
=
roi_sampler_config
.
num_sampled_rois
,
foreground_iou_threshold
=
iou
,
background_iou_high_threshold
=
iou
,
background_iou_low_threshold
=
0.0
,
skip_subsampling
=
True
)
roi_sampler_cascade
.
append
(
roi_sampler_obj
)
roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
roi_aligner_config
.
crop_size
,
sample_offset
=
roi_aligner_config
.
sample_offset
)
detection_generator_obj
=
detection_generator
.
DetectionGenerator
(
apply_nms
=
generator_config
.
apply_nms
,
pre_nms_top_k
=
generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
generator_config
.
pre_nms_score_threshold
,
nms_iou_threshold
=
generator_config
.
nms_iou_threshold
,
max_num_detections
=
generator_config
.
max_num_detections
,
nms_version
=
generator_config
.
nms_version
,
use_cpu_nms
=
generator_config
.
use_cpu_nms
,
soft_nms_sigma
=
generator_config
.
soft_nms_sigma
)
if
model_config
.
include_mask
:
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
model_config
.
num_classes
,
upsample_factor
=
model_config
.
mask_head
.
upsample_factor
,
num_convs
=
model_config
.
mask_head
.
num_convs
,
num_filters
=
model_config
.
mask_head
.
num_filters
,
use_separable_conv
=
model_config
.
mask_head
.
use_separable_conv
,
activation
=
model_config
.
norm_activation
.
activation
,
norm_momentum
=
model_config
.
norm_activation
.
norm_momentum
,
norm_epsilon
=
model_config
.
norm_activation
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
,
class_agnostic
=
model_config
.
mask_head
.
class_agnostic
)
mask_sampler_obj
=
mask_sampler
.
MaskSampler
(
mask_target_size
=
(
model_config
.
mask_roi_aligner
.
crop_size
*
model_config
.
mask_head
.
upsample_factor
),
num_sampled_masks
=
model_config
.
mask_sampler
.
num_sampled_masks
)
mask_roi_aligner_obj
=
roi_aligner
.
MultilevelROIAligner
(
crop_size
=
model_config
.
mask_roi_aligner
.
crop_size
,
sample_offset
=
model_config
.
mask_roi_aligner
.
sample_offset
)
else
:
mask_head
=
None
mask_sampler_obj
=
None
mask_roi_aligner_obj
=
None
model
=
maskrcnn_model
.
MaskRCNNModel
(
backbone
=
backbone
,
decoder
=
decoder
,
rpn_head
=
rpn_head
,
detection_head
=
detection_head
,
roi_generator
=
roi_generator_obj
,
roi_sampler
=
roi_sampler_cascade
,
roi_aligner
=
roi_aligner_obj
,
detection_generator
=
detection_generator_obj
,
mask_head
=
mask_head
,
mask_sampler
=
mask_sampler_obj
,
mask_roi_aligner
=
mask_roi_aligner_obj
,
class_agnostic_bbox_pred
=
detection_head_config
.
class_agnostic_bbox_pred
,
cascade_class_ensemble
=
detection_head_config
.
cascade_class_ensemble
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_scales
=
model_config
.
anchor
.
num_scales
,
aspect_ratios
=
model_config
.
anchor
.
aspect_ratios
,
anchor_size
=
model_config
.
anchor
.
anchor_size
)
return
model
def
build_retinanet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
retinanet_cfg
.
RetinaNet
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
Model
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds RetinaNet model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
backbone_features
=
backbone
(
tf
.
keras
.
Input
(
input_specs
.
shape
[
1
:]))
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
head_config
=
model_config
.
head
generator_config
=
model_config
.
detection_generator
num_anchors_per_location
=
(
len
(
model_config
.
anchor
.
aspect_ratios
)
*
model_config
.
anchor
.
num_scales
)
head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_classes
=
model_config
.
num_classes
,
num_anchors_per_location
=
num_anchors_per_location
,
num_convs
=
head_config
.
num_convs
,
num_filters
=
head_config
.
num_filters
,
attribute_heads
=
[
cfg
.
as_dict
()
for
cfg
in
(
head_config
.
attribute_heads
or
[])
],
use_separable_conv
=
head_config
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
# Builds decoder and head so that their trainable weights are initialized
if
decoder
:
decoder_features
=
decoder
(
backbone_features
)
_
=
head
(
decoder_features
)
detection_generator_obj
=
detection_generator
.
MultilevelDetectionGenerator
(
apply_nms
=
generator_config
.
apply_nms
,
pre_nms_top_k
=
generator_config
.
pre_nms_top_k
,
pre_nms_score_threshold
=
generator_config
.
pre_nms_score_threshold
,
nms_iou_threshold
=
generator_config
.
nms_iou_threshold
,
max_num_detections
=
generator_config
.
max_num_detections
,
nms_version
=
generator_config
.
nms_version
,
use_cpu_nms
=
generator_config
.
use_cpu_nms
,
soft_nms_sigma
=
generator_config
.
soft_nms_sigma
)
model
=
retinanet_model
.
RetinaNetModel
(
backbone
,
decoder
,
head
,
detection_generator_obj
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_scales
=
model_config
.
anchor
.
num_scales
,
aspect_ratios
=
model_config
.
anchor
.
aspect_ratios
,
anchor_size
=
model_config
.
anchor
.
anchor_size
)
return
model
def
build_segmentation_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
segmentation_cfg
.
SemanticSegmentationModel
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
backbone
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
decoder
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Segmentation model."""
norm_activation_config
=
model_config
.
norm_activation
if
not
backbone
:
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
if
not
decoder
:
decoder
=
decoders
.
factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
head_config
=
model_config
.
head
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
model_config
.
num_classes
,
level
=
head_config
.
level
,
num_convs
=
head_config
.
num_convs
,
prediction_kernel_size
=
head_config
.
prediction_kernel_size
,
num_filters
=
head_config
.
num_filters
,
use_depthwise_convolution
=
head_config
.
use_depthwise_convolution
,
upsample_factor
=
head_config
.
upsample_factor
,
feature_fusion
=
head_config
.
feature_fusion
,
low_level
=
head_config
.
low_level
,
low_level_num_filters
=
head_config
.
low_level_num_filters
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
mask_scoring_head
=
None
if
model_config
.
mask_scoring_head
:
mask_scoring_head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
model_config
.
num_classes
,
**
model_config
.
mask_scoring_head
.
as_dict
(),
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
model
=
segmentation_model
.
SegmentationModel
(
backbone
,
decoder
,
head
,
mask_scoring_head
=
mask_scoring_head
)
return
model
official/vision/modeling/factory_3d.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
# Import libraries
import
tensorflow
as
tf
from
official.core
import
registry
from
official.vision.configs
import
video_classification
as
video_classification_cfg
from
official.vision.modeling
import
video_classification_model
from
official.vision.modeling
import
backbones
_REGISTERED_MODEL_CLS
=
{}
def
register_model_builder
(
key
:
str
):
"""Decorates a builder of model class.
The builder should be a Callable (a class or a function).
This decorator supports registration of backbone builder as follows:
```
class MyModel(tf.keras.Model):
pass
@register_backbone_builder('mybackbone')
def builder(input_specs, config, l2_reg):
return MyModel(...)
# Builds a MyModel object.
my_backbone = build_backbone_3d(input_specs, config, l2_reg)
```
Args:
key: the key to look up the builder.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of model class.
"""
return
registry
.
register
(
_REGISTERED_MODEL_CLS
,
key
)
def
build_model
(
model_type
:
str
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
video_classification_cfg
.
hyperparams
.
Config
,
num_classes
:
int
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds backbone from a config.
Args:
model_type: string name of model type. It should be consistent with
ModelConfig.model_type.
input_specs: tf.keras.layers.InputSpec.
model_config: a OneOfConfig. Model config.
num_classes: number of classes.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
Returns:
tf.keras.Model instance of the backbone.
"""
model_builder
=
registry
.
lookup
(
_REGISTERED_MODEL_CLS
,
model_type
)
return
model_builder
(
input_specs
,
model_config
,
num_classes
,
l2_regularizer
)
@
register_model_builder
(
'video_classification'
)
def
build_video_classification_model
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
video_classification_cfg
.
VideoClassificationModel
,
num_classes
:
int
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds the video classification model."""
input_specs_dict
=
{
'image'
:
input_specs
}
norm_activation_config
=
model_config
.
norm_activation
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
model
=
video_classification_model
.
VideoClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
input_specs
=
input_specs_dict
,
dropout_rate
=
model_config
.
dropout_rate
,
aggregate_endpoints
=
model_config
.
aggregate_endpoints
,
kernel_regularizer
=
l2_regularizer
,
require_endpoints
=
model_config
.
require_endpoints
)
return
model
official/vision/modeling/factory_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for factory.py."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.configs
import
backbones
from
official.vision.configs
import
backbones_3d
from
official.vision.configs
import
image_classification
as
classification_cfg
from
official.vision.configs
import
maskrcnn
as
maskrcnn_cfg
from
official.vision.configs
import
retinanet
as
retinanet_cfg
from
official.vision.configs
import
video_classification
as
video_classification_cfg
from
official.vision.modeling
import
factory
from
official.vision.modeling
import
factory_3d
class
ClassificationModelBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
224
,
224
),
5e-5
),
(
'resnet'
,
(
224
,
224
),
None
),
(
'resnet'
,
(
None
,
None
),
5e-5
),
(
'resnet'
,
(
None
,
None
),
None
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
weight_decay
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model_config
=
classification_cfg
.
ImageClassificationModel
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
))
l2_regularizer
=
(
tf
.
keras
.
regularizers
.
l2
(
weight_decay
)
if
weight_decay
else
None
)
_
=
factory
.
build_classification_model
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
class
MaskRCNNBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
640
,
640
)),
(
'resnet'
,
(
None
,
None
)),
)
def
test_builder
(
self
,
backbone_type
,
input_size
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model_config
=
maskrcnn_cfg
.
MaskRCNN
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
))
l2_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
5e-5
)
_
=
factory
.
build_maskrcnn
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
class
RetinaNetBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet'
,
(
640
,
640
),
False
),
(
'resnet'
,
(
None
,
None
),
True
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
has_att_heads
):
num_classes
=
2
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
if
has_att_heads
:
attribute_heads_config
=
[
retinanet_cfg
.
AttributeHead
(
name
=
'att1'
),
retinanet_cfg
.
AttributeHead
(
name
=
'att2'
,
type
=
'classification'
,
size
=
2
),
]
else
:
attribute_heads_config
=
None
model_config
=
retinanet_cfg
.
RetinaNet
(
num_classes
=
num_classes
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
),
head
=
retinanet_cfg
.
RetinaNetHead
(
attribute_heads
=
attribute_heads_config
))
l2_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
5e-5
)
_
=
factory
.
build_retinanet
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
if
has_att_heads
:
self
.
assertEqual
(
model_config
.
head
.
attribute_heads
[
0
].
as_dict
(),
dict
(
name
=
'att1'
,
type
=
'regression'
,
size
=
1
))
self
.
assertEqual
(
model_config
.
head
.
attribute_heads
[
1
].
as_dict
(),
dict
(
name
=
'att2'
,
type
=
'classification'
,
size
=
2
))
class
VideoClassificationModelBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'resnet_3d'
,
(
8
,
224
,
224
),
5e-5
),
(
'resnet_3d'
,
(
None
,
None
,
None
),
5e-5
),
)
def
test_builder
(
self
,
backbone_type
,
input_size
,
weight_decay
):
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
input_size
[
2
],
3
])
model_config
=
video_classification_cfg
.
VideoClassificationModel
(
backbone
=
backbones_3d
.
Backbone3D
(
type
=
backbone_type
))
l2_regularizer
=
(
tf
.
keras
.
regularizers
.
l2
(
weight_decay
)
if
weight_decay
else
None
)
_
=
factory_3d
.
build_video_classification_model
(
input_specs
=
input_specs
,
model_config
=
model_config
,
num_classes
=
2
,
l2_regularizer
=
l2_regularizer
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/heads/__init__.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Heads package definition."""
from
official.vision.modeling.heads.dense_prediction_heads
import
RetinaNetHead
from
official.vision.modeling.heads.dense_prediction_heads
import
RPNHead
from
official.vision.modeling.heads.instance_heads
import
DetectionHead
from
official.vision.modeling.heads.instance_heads
import
MaskHead
from
official.vision.modeling.heads.segmentation_heads
import
SegmentationHead
official/vision/modeling/heads/dense_prediction_heads.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of dense prediction heads."""
from
typing
import
Any
,
Dict
,
List
,
Mapping
,
Optional
,
Union
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RetinaNetHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a RetinaNet head."""
def
__init__
(
self
,
min_level
:
int
,
max_level
:
int
,
num_classes
:
int
,
num_anchors_per_location
:
int
,
num_convs
:
int
=
4
,
num_filters
:
int
=
256
,
attribute_heads
:
Optional
[
List
[
Dict
[
str
,
Any
]]]
=
None
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
num_params_per_anchor
:
int
=
4
,
**
kwargs
):
"""Initializes a RetinaNet head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_classes: An `int` number of classes to predict.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate conv layers.
attribute_heads: If not None, a list that contains a dict for each
additional attribute head. Each dict consists of 3 key-value pairs:
`name`, `type` ('regression' or 'classification'), and `size` (number
of predicted values for each instance).
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
num_params_per_anchor: Number of parameters required to specify an anchor
box. For example, `num_params_per_anchor` would be 4 for axis-aligned
anchor boxes specified by their y-centers, x-centers, heights, and
widths.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
RetinaNetHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_classes'
:
num_classes
,
'num_anchors_per_location'
:
num_anchors_per_location
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'attribute_heads'
:
attribute_heads
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'num_params_per_anchor'
:
num_params_per_anchor
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
# Class net.
self
.
_cls_convs
=
[]
self
.
_cls_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_cls_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
cls_conv_name
=
'classnet-conv_{}'
.
format
(
i
)
self
.
_cls_convs
.
append
(
conv_op
(
name
=
cls_conv_name
,
**
conv_kwargs
))
cls_norm_name
=
'classnet-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_cls_norms
.
append
(
bn_op
(
name
=
cls_norm_name
,
**
bn_kwargs
))
self
.
_cls_norms
.
append
(
this_level_cls_norms
)
classifier_kwargs
=
{
'filters'
:
(
self
.
_config_dict
[
'num_classes'
]
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
)),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
classifier_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_classifier
=
conv_op
(
name
=
'scores'
,
**
classifier_kwargs
)
# Box net.
self
.
_box_convs
=
[]
self
.
_box_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_box_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
box_conv_name
=
'boxnet-conv_{}'
.
format
(
i
)
self
.
_box_convs
.
append
(
conv_op
(
name
=
box_conv_name
,
**
conv_kwargs
))
box_norm_name
=
'boxnet-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_box_norms
.
append
(
bn_op
(
name
=
box_norm_name
,
**
bn_kwargs
))
self
.
_box_norms
.
append
(
this_level_box_norms
)
box_regressor_kwargs
=
{
'filters'
:
(
self
.
_config_dict
[
'num_params_per_anchor'
]
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
box_regressor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_box_regressor
=
conv_op
(
name
=
'boxes'
,
**
box_regressor_kwargs
)
# Attribute learning nets.
if
self
.
_config_dict
[
'attribute_heads'
]:
self
.
_att_predictors
=
{}
self
.
_att_convs
=
{}
self
.
_att_norms
=
{}
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]:
att_name
=
att_config
[
'name'
]
att_type
=
att_config
[
'type'
]
att_size
=
att_config
[
'size'
]
att_convs_i
=
[]
att_norms_i
=
[]
# Build conv and norm layers.
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_att_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
att_conv_name
=
'{}-conv_{}'
.
format
(
att_name
,
i
)
att_convs_i
.
append
(
conv_op
(
name
=
att_conv_name
,
**
conv_kwargs
))
att_norm_name
=
'{}-conv-norm_{}_{}'
.
format
(
att_name
,
level
,
i
)
this_level_att_norms
.
append
(
bn_op
(
name
=
att_norm_name
,
**
bn_kwargs
))
att_norms_i
.
append
(
this_level_att_norms
)
self
.
_att_convs
[
att_name
]
=
att_convs_i
self
.
_att_norms
[
att_name
]
=
att_norms_i
# Build the final prediction layer.
att_predictor_kwargs
=
{
'filters'
:
(
att_size
*
self
.
_config_dict
[
'num_anchors_per_location'
]),
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
att_type
==
'regression'
:
att_predictor_kwargs
.
update
(
{
'bias_initializer'
:
tf
.
zeros_initializer
()})
elif
att_type
==
'classification'
:
att_predictor_kwargs
.
update
({
'bias_initializer'
:
tf
.
constant_initializer
(
-
np
.
log
((
1
-
0.01
)
/
0.01
))
})
else
:
raise
ValueError
(
'Attribute head type {} not supported.'
.
format
(
att_type
))
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
att_predictor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_att_predictors
[
att_name
]
=
conv_op
(
name
=
'{}_attributes'
.
format
(
att_name
),
**
att_predictor_kwargs
)
super
(
RetinaNetHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
features
:
Mapping
[
str
,
tf
.
Tensor
]):
"""Forward pass of the RetinaNet head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l,
num_params_per_anchor * num_anchors_per_location].
attributes: a dict of (attribute_name, attribute_prediction). Each
`attribute_prediction` is a dict of:
- key: `str`, the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature
level, whose shape is
[batch, height_l, width_l,
attribute_size * num_anchors_per_location].
Can be an empty dictionary if no attribute learning is required.
"""
scores
=
{}
boxes
=
{}
if
self
.
_config_dict
[
'attribute_heads'
]:
attributes
=
{
att_config
[
'name'
]:
{}
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]
}
else
:
attributes
=
{}
for
i
,
level
in
enumerate
(
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
)):
this_level_features
=
features
[
str
(
level
)]
# class net.
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_cls_convs
,
self
.
_cls_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
scores
[
str
(
level
)]
=
self
.
_classifier
(
x
)
# box net.
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_box_convs
,
self
.
_box_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
boxes
[
str
(
level
)]
=
self
.
_box_regressor
(
x
)
# attribute nets.
if
self
.
_config_dict
[
'attribute_heads'
]:
for
att_config
in
self
.
_config_dict
[
'attribute_heads'
]:
att_name
=
att_config
[
'name'
]
x
=
this_level_features
for
conv
,
norm
in
zip
(
self
.
_att_convs
[
att_name
],
self
.
_att_norms
[
att_name
][
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
attributes
[
att_name
][
str
(
level
)]
=
self
.
_att_predictors
[
att_name
](
x
)
return
scores
,
boxes
,
attributes
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RPNHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a Region Proposal Network (RPN) head."""
def
__init__
(
self
,
min_level
:
int
,
max_level
:
int
,
num_anchors_per_location
:
int
,
num_convs
:
int
=
1
,
num_filters
:
int
=
256
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a Region Proposal Network head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
RPNHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_anchors_per_location'
:
num_anchors_per_location
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
"""Creates the variables of the head."""
conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_norms
=
[]
for
level
in
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
):
this_level_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
level
==
self
.
_config_dict
[
'min_level'
]:
conv_name
=
'rpn-conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
norm_name
=
'rpn-conv-norm_{}_{}'
.
format
(
level
,
i
)
this_level_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
self
.
_norms
.
append
(
this_level_norms
)
classifier_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_anchors_per_location'
],
'kernel_size'
:
1
,
'padding'
:
'valid'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
classifier_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_classifier
=
conv_op
(
name
=
'rpn-scores'
,
**
classifier_kwargs
)
box_regressor_kwargs
=
{
'filters'
:
4
*
self
.
_config_dict
[
'num_anchors_per_location'
],
'kernel_size'
:
1
,
'padding'
:
'valid'
,
'bias_initializer'
:
tf
.
zeros_initializer
(),
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
if
not
self
.
_config_dict
[
'use_separable_conv'
]:
box_regressor_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
1e-5
),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
})
self
.
_box_regressor
=
conv_op
(
name
=
'rpn-boxes'
,
**
box_regressor_kwargs
)
super
(
RPNHead
,
self
).
build
(
input_shape
)
def
call
(
self
,
features
:
Mapping
[
str
,
tf
.
Tensor
]):
"""Forward pass of the RPN head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
scores
=
{}
boxes
=
{}
for
i
,
level
in
enumerate
(
range
(
self
.
_config_dict
[
'min_level'
],
self
.
_config_dict
[
'max_level'
]
+
1
)):
x
=
features
[
str
(
level
)]
for
conv
,
norm
in
zip
(
self
.
_convs
,
self
.
_norms
[
i
]):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
scores
[
str
(
level
)]
=
self
.
_classifier
(
x
)
boxes
[
str
(
level
)]
=
self
.
_box_regressor
(
x
)
return
scores
,
boxes
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/heads/dense_prediction_heads_test.py
0 → 100644
View file @
8a9a607c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for dense_prediction_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
dense_prediction_heads
class
RetinaNetHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
False
,
False
,
False
),
(
False
,
True
,
False
),
(
True
,
False
,
True
),
(
True
,
True
,
True
),
)
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
,
has_att_heads
):
if
has_att_heads
:
attribute_heads
=
[
dict
(
name
=
'depth'
,
type
=
'regression'
,
size
=
1
)]
else
:
attribute_heads
=
None
retinanet_head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
3
,
max_level
=
4
,
num_classes
=
3
,
num_anchors_per_location
=
3
,
num_convs
=
2
,
num_filters
=
256
,
attribute_heads
=
attribute_heads
,
use_separable_conv
=
use_separable_conv
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
}
scores
,
boxes
,
attributes
=
retinanet_head
(
features
)
self
.
assertAllEqual
(
scores
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
9
])
self
.
assertAllEqual
(
scores
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
9
])
self
.
assertAllEqual
(
boxes
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
12
])
self
.
assertAllEqual
(
boxes
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
12
])
if
has_att_heads
:
for
att
in
attributes
.
values
():
self
.
assertAllEqual
(
att
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
3
])
self
.
assertAllEqual
(
att
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
3
])
def
test_serialize_deserialize
(
self
):
retinanet_head
=
dense_prediction_heads
.
RetinaNetHead
(
min_level
=
3
,
max_level
=
7
,
num_classes
=
3
,
num_anchors_per_location
=
9
,
num_convs
=
2
,
num_filters
=
16
,
attribute_heads
=
None
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
retinanet_head
.
get_config
()
new_retinanet_head
=
(
dense_prediction_heads
.
RetinaNetHead
.
from_config
(
config
))
self
.
assertAllEqual
(
retinanet_head
.
get_config
(),
new_retinanet_head
.
get_config
())
class
RpnHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
False
,
False
),
(
False
,
True
),
(
True
,
False
),
(
True
,
True
),
)
def
test_forward
(
self
,
use_separable_conv
,
use_sync_bn
):
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
3
,
max_level
=
4
,
num_anchors_per_location
=
3
,
num_convs
=
2
,
num_filters
=
256
,
use_separable_conv
=
use_separable_conv
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
}
scores
,
boxes
=
rpn_head
(
features
)
self
.
assertAllEqual
(
scores
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
3
])
self
.
assertAllEqual
(
scores
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
3
])
self
.
assertAllEqual
(
boxes
[
'3'
].
numpy
().
shape
,
[
2
,
128
,
128
,
12
])
self
.
assertAllEqual
(
boxes
[
'4'
].
numpy
().
shape
,
[
2
,
64
,
64
,
12
])
def
test_serialize_deserialize
(
self
):
rpn_head
=
dense_prediction_heads
.
RPNHead
(
min_level
=
3
,
max_level
=
7
,
num_anchors_per_location
=
9
,
num_convs
=
2
,
num_filters
=
16
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
rpn_head
.
get_config
()
new_rpn_head
=
dense_prediction_heads
.
RPNHead
.
from_config
(
config
)
self
.
assertAllEqual
(
rpn_head
.
get_config
(),
new_rpn_head
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment