Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0225b135
Unverified
Commit
0225b135
authored
Mar 05, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Mar 05, 2022
Browse files
Merge branch 'tensorflow:master' into panoptic-deeplab-modeling
parents
7479dbb8
4c571a3c
Changes
332
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4049 additions
and
0 deletions
+4049
-0
official/vision/modeling/backbones/resnet_deeplab.py
official/vision/modeling/backbones/resnet_deeplab.py
+366
-0
official/vision/modeling/backbones/resnet_deeplab_test.py
official/vision/modeling/backbones/resnet_deeplab_test.py
+144
-0
official/vision/modeling/backbones/resnet_test.py
official/vision/modeling/backbones/resnet_test.py
+156
-0
official/vision/modeling/backbones/revnet.py
official/vision/modeling/backbones/revnet.py
+233
-0
official/vision/modeling/backbones/revnet_test.py
official/vision/modeling/backbones/revnet_test.py
+92
-0
official/vision/modeling/backbones/spinenet.py
official/vision/modeling/backbones/spinenet.py
+572
-0
official/vision/modeling/backbones/spinenet_mobile.py
official/vision/modeling/backbones/spinenet_mobile.py
+539
-0
official/vision/modeling/backbones/spinenet_mobile_test.py
official/vision/modeling/backbones/spinenet_mobile_test.py
+112
-0
official/vision/modeling/backbones/spinenet_test.py
official/vision/modeling/backbones/spinenet_test.py
+128
-0
official/vision/modeling/classification_model.py
official/vision/modeling/classification_model.py
+122
-0
official/vision/modeling/classification_model_test.py
official/vision/modeling/classification_model_test.py
+184
-0
official/vision/modeling/decoders/__init__.py
official/vision/modeling/decoders/__init__.py
+20
-0
official/vision/modeling/decoders/aspp.py
official/vision/modeling/decoders/aspp.py
+203
-0
official/vision/modeling/decoders/aspp_test.py
official/vision/modeling/decoders/aspp_test.py
+94
-0
official/vision/modeling/decoders/factory.py
official/vision/modeling/decoders/factory.py
+135
-0
official/vision/modeling/decoders/factory_test.py
official/vision/modeling/decoders/factory_test.py
+159
-0
official/vision/modeling/decoders/fpn.py
official/vision/modeling/decoders/fpn.py
+246
-0
official/vision/modeling/decoders/fpn_test.py
official/vision/modeling/decoders/fpn_test.py
+117
-0
official/vision/modeling/decoders/nasfpn.py
official/vision/modeling/decoders/nasfpn.py
+368
-0
official/vision/modeling/decoders/nasfpn_test.py
official/vision/modeling/decoders/nasfpn_test.py
+59
-0
No files found.
official/vision/modeling/backbones/resnet_deeplab.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of Residual Networks with Deeplab modifications."""
from
typing
import
Callable
,
Optional
,
Tuple
,
List
import
numpy
as
np
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.backbones
import
factory
from
official.vision.modeling.layers
import
nn_blocks
from
official.vision.modeling.layers
import
nn_layers
layers
=
tf
.
keras
.
layers
# Specifications for different ResNet variants.
# Each entry specifies block configurations of the particular ResNet variant.
# Each element in the block configuration is in the following format:
# (block_fn, num_filters, block_repeats)
RESNET_SPECS
=
{
50
:
[
(
'bottleneck'
,
64
,
3
),
(
'bottleneck'
,
128
,
4
),
(
'bottleneck'
,
256
,
6
),
(
'bottleneck'
,
512
,
3
),
],
101
:
[
(
'bottleneck'
,
64
,
3
),
(
'bottleneck'
,
128
,
4
),
(
'bottleneck'
,
256
,
23
),
(
'bottleneck'
,
512
,
3
),
],
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DilatedResNet
(
tf
.
keras
.
Model
):
"""Creates a ResNet model with Deeplabv3 modifications.
This backbone is suitable for semantic segmentation. This implements
Liang-Chieh Chen, George Papandreou, Florian Schroff, Hartwig Adam.
Rethinking Atrous Convolution for Semantic Image Segmentation.
(https://arxiv.org/pdf/1706.05587)
"""
def
__init__
(
self
,
model_id
:
int
,
output_stride
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
stem_type
:
str
=
'v0'
,
se_ratio
:
Optional
[
float
]
=
None
,
init_stochastic_depth_rate
:
float
=
0.0
,
multigrid
:
Optional
[
Tuple
[
int
]]
=
None
,
last_stage_repeats
:
int
=
1
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a ResNet model with DeepLab modification.
Args:
model_id: An `int` specifies depth of ResNet backbone model.
output_stride: An `int` of output stride, ratio of input to output
resolution.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
stem_type: A `str` of stem type. Can be `v0` or `v1`. `v1` replaces 7x7
conv by 3 3x3 convs.
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
multigrid: A tuple of the same length as the number of blocks in the last
resnet stage.
last_stage_repeats: An `int` that specifies how many times last stage is
repeated.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_output_stride
=
output_stride
self
.
_input_specs
=
input_specs
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_stem_type
=
stem_type
self
.
_se_ratio
=
se_ratio
self
.
_init_stochastic_depth_rate
=
init_stochastic_depth_rate
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
# Build ResNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
if
stem_type
==
'v0'
:
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
7
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
activation
)(
x
)
elif
stem_type
==
'v1'
:
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
3
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
activation
)(
x
)
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
activation
)(
x
)
x
=
layers
.
Conv2D
(
filters
=
128
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
activation
)(
x
)
else
:
raise
ValueError
(
'Stem type {} not supported.'
.
format
(
stem_type
))
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'same'
)(
x
)
normal_resnet_stage
=
int
(
np
.
math
.
log2
(
self
.
_output_stride
))
-
2
endpoints
=
{}
for
i
in
range
(
normal_resnet_stage
+
1
):
spec
=
RESNET_SPECS
[
model_id
][
i
]
if
spec
[
0
]
==
'bottleneck'
:
block_fn
=
nn_blocks
.
BottleneckBlock
else
:
raise
ValueError
(
'Block fn `{}` is not supported.'
.
format
(
spec
[
0
]))
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
spec
[
1
],
strides
=
(
1
if
i
==
0
else
2
),
dilation_rate
=
1
,
block_fn
=
block_fn
,
block_repeats
=
spec
[
2
],
stochastic_depth_drop_rate
=
nn_layers
.
get_stochastic_depth_rate
(
self
.
_init_stochastic_depth_rate
,
i
+
2
,
4
+
last_stage_repeats
),
name
=
'block_group_l{}'
.
format
(
i
+
2
))
endpoints
[
str
(
i
+
2
)]
=
x
dilation_rate
=
2
for
i
in
range
(
normal_resnet_stage
+
1
,
3
+
last_stage_repeats
):
spec
=
RESNET_SPECS
[
model_id
][
i
]
if
i
<
3
else
RESNET_SPECS
[
model_id
][
-
1
]
if
spec
[
0
]
==
'bottleneck'
:
block_fn
=
nn_blocks
.
BottleneckBlock
else
:
raise
ValueError
(
'Block fn `{}` is not supported.'
.
format
(
spec
[
0
]))
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
spec
[
1
],
strides
=
1
,
dilation_rate
=
dilation_rate
,
block_fn
=
block_fn
,
block_repeats
=
spec
[
2
],
stochastic_depth_drop_rate
=
nn_layers
.
get_stochastic_depth_rate
(
self
.
_init_stochastic_depth_rate
,
i
+
2
,
4
+
last_stage_repeats
),
multigrid
=
multigrid
if
i
>=
3
else
None
,
name
=
'block_group_l{}'
.
format
(
i
+
2
))
dilation_rate
*=
2
endpoints
[
str
(
normal_resnet_stage
+
2
)]
=
x
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
}
super
(
DilatedResNet
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
endpoints
,
**
kwargs
)
def
_block_group
(
self
,
inputs
:
tf
.
Tensor
,
filters
:
int
,
strides
:
int
,
dilation_rate
:
int
,
block_fn
:
Callable
[...,
tf
.
keras
.
layers
.
Layer
],
block_repeats
:
int
=
1
,
stochastic_depth_drop_rate
:
float
=
0.0
,
multigrid
:
Optional
[
List
[
int
]]
=
None
,
name
:
str
=
'block_group'
):
"""Creates one group of blocks for the ResNet model.
Deeplab applies strides at the last block.
Args:
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` off number of filters for the first convolution of the
layer.
strides: An `int` of stride to use for the first convolution of the layer.
If greater than 1, this layer will downsample the input.
dilation_rate: An `int` of diluted convolution rates.
block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
block_repeats: An `int` of number of blocks contained in the layer.
stochastic_depth_drop_rate: A `float` of drop rate of the current block
group.
multigrid: A list of `int` or None. If specified, dilation rates for each
block is scaled up by its corresponding factor in the multigrid.
name: A `str` name for the block.
Returns:
The output `tf.Tensor` of the block layer.
"""
if
multigrid
is
not
None
and
len
(
multigrid
)
!=
block_repeats
:
raise
ValueError
(
'multigrid has to match number of block_repeats'
)
if
multigrid
is
None
:
multigrid
=
[
1
]
*
block_repeats
# TODO(arashwan): move striding at the of the block.
x
=
block_fn
(
filters
=
filters
,
strides
=
strides
,
dilation_rate
=
dilation_rate
*
multigrid
[
0
],
use_projection
=
True
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
se_ratio
=
self
.
_se_ratio
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
i
in
range
(
1
,
block_repeats
):
x
=
block_fn
(
filters
=
filters
,
strides
=
1
,
dilation_rate
=
dilation_rate
*
multigrid
[
i
],
use_projection
=
False
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
se_ratio
=
self
.
_se_ratio
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x
)
return
tf
.
identity
(
x
,
name
=
name
)
def
get_config
(
self
):
config_dict
=
{
'model_id'
:
self
.
_model_id
,
'output_stride'
:
self
.
_output_stride
,
'stem_type'
:
self
.
_stem_type
,
'se_ratio'
:
self
.
_se_ratio
,
'init_stochastic_depth_rate'
:
self
.
_init_stochastic_depth_rate
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
):
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'dilated_resnet'
)
def
build_dilated_resnet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds ResNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'dilated_resnet'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
return
DilatedResNet
(
model_id
=
backbone_cfg
.
model_id
,
output_stride
=
backbone_cfg
.
output_stride
,
input_specs
=
input_specs
,
stem_type
=
backbone_cfg
.
stem_type
,
se_ratio
=
backbone_cfg
.
se_ratio
,
init_stochastic_depth_rate
=
backbone_cfg
.
stochastic_depth_drop_rate
,
multigrid
=
backbone_cfg
.
multigrid
,
last_stage_repeats
=
backbone_cfg
.
last_stage_repeats
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/backbones/resnet_deeplab_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for resnet_deeplab models."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.modeling.backbones
import
resnet_deeplab
class
ResNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
50
,
4
,
8
),
(
128
,
101
,
4
,
8
),
(
128
,
50
,
4
,
16
),
(
128
,
101
,
4
,
16
),
)
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
output_stride
):
"""Test creation of ResNet models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
network
=
resnet_deeplab
.
DilatedResNet
(
model_id
=
model_id
,
output_stride
=
output_stride
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
print
(
endpoints
)
self
.
assertAllEqual
([
1
,
input_size
/
output_stride
,
input_size
/
output_stride
,
512
*
endpoint_filter_scale
],
endpoints
[
str
(
int
(
np
.
math
.
log2
(
output_stride
)))].
shape
.
as_list
())
@
parameterized
.
parameters
(
(
'v0'
,
None
,
0.0
),
(
'v1'
,
None
,
0.0
),
(
'v1'
,
0.25
,
0.0
),
(
'v1'
,
0.25
,
0.2
),
)
def
test_network_features
(
self
,
stem_type
,
se_ratio
,
init_stochastic_depth_rate
):
"""Test additional features of ResNet models."""
input_size
=
128
model_id
=
50
endpoint_filter_scale
=
4
output_stride
=
8
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
network
=
resnet_deeplab
.
DilatedResNet
(
model_id
=
model_id
,
output_stride
=
output_stride
,
stem_type
=
stem_type
,
se_ratio
=
se_ratio
,
init_stochastic_depth_rate
=
init_stochastic_depth_rate
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
print
(
endpoints
)
self
.
assertAllEqual
([
1
,
input_size
/
output_stride
,
input_size
/
output_stride
,
512
*
endpoint_filter_scale
],
endpoints
[
str
(
int
(
np
.
math
.
log2
(
output_stride
)))].
shape
.
as_list
())
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
use_sync_bn
=
[
False
,
True
],
))
def
test_sync_bn_multiple_devices
(
self
,
strategy
,
use_sync_bn
):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
64
,
128
,
128
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
with
strategy
.
scope
():
network
=
resnet_deeplab
.
DilatedResNet
(
model_id
=
50
,
output_stride
=
8
,
use_sync_bn
=
use_sync_bn
)
_
=
network
(
inputs
)
@
parameterized
.
parameters
(
1
,
3
,
4
)
def
test_input_specs
(
self
,
input_dim
):
"""Test different input feature dimensions."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
input_dim
])
network
=
resnet_deeplab
.
DilatedResNet
(
model_id
=
50
,
output_stride
=
8
,
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
128
,
128
,
input_dim
),
batch_size
=
1
)
_
=
network
(
inputs
)
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
model_id
=
50
,
output_stride
=
8
,
stem_type
=
'v0'
,
se_ratio
=
0.25
,
init_stochastic_depth_rate
=
0.2
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
resnet_deeplab
.
DilatedResNet
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
resnet_deeplab
.
DilatedResNet
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/backbones/resnet_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for resnet."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.modeling.backbones
import
resnet
class
ResNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
10
,
1
),
(
128
,
18
,
1
),
(
128
,
34
,
1
),
(
128
,
50
,
4
),
(
128
,
101
,
4
),
(
128
,
152
,
4
),
)
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
):
"""Test creation of ResNet family models."""
resnet_params
=
{
10
:
4915904
,
18
:
11190464
,
34
:
21306048
,
50
:
23561152
,
101
:
42605504
,
152
:
58295232
,
}
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
network
=
resnet
.
ResNet
(
model_id
=
model_id
)
self
.
assertEqual
(
network
.
count_params
(),
resnet_params
[
model_id
])
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
2
,
input_size
/
2
**
2
,
64
*
endpoint_filter_scale
],
endpoints
[
'2'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
128
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
256
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
512
*
endpoint_filter_scale
],
endpoints
[
'5'
].
shape
.
as_list
())
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
use_sync_bn
=
[
False
,
True
],
))
def
test_sync_bn_multiple_devices
(
self
,
strategy
,
use_sync_bn
):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
64
,
128
,
128
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
with
strategy
.
scope
():
network
=
resnet
.
ResNet
(
model_id
=
50
,
use_sync_bn
=
use_sync_bn
)
_
=
network
(
inputs
)
@
parameterized
.
parameters
(
(
128
,
34
,
1
,
'v0'
,
None
,
0.0
,
1.0
,
False
,
False
),
(
128
,
34
,
1
,
'v1'
,
0.25
,
0.2
,
1.25
,
True
,
True
),
(
128
,
50
,
4
,
'v0'
,
None
,
0.0
,
1.5
,
False
,
False
),
(
128
,
50
,
4
,
'v1'
,
0.25
,
0.2
,
2.0
,
True
,
True
),
)
def
test_resnet_rs
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
,
stem_type
,
se_ratio
,
init_stochastic_depth_rate
,
depth_multiplier
,
resnetd_shortcut
,
replace_stem_max_pool
):
"""Test creation of ResNet family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
network
=
resnet
.
ResNet
(
model_id
=
model_id
,
depth_multiplier
=
depth_multiplier
,
stem_type
=
stem_type
,
resnetd_shortcut
=
resnetd_shortcut
,
replace_stem_max_pool
=
replace_stem_max_pool
,
se_ratio
=
se_ratio
,
init_stochastic_depth_rate
=
init_stochastic_depth_rate
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
_
=
network
(
inputs
)
@
parameterized
.
parameters
(
1
,
3
,
4
)
def
test_input_specs
(
self
,
input_dim
):
"""Test different input feature dimensions."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
input_dim
])
network
=
resnet
.
ResNet
(
model_id
=
50
,
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
128
,
128
,
input_dim
),
batch_size
=
1
)
_
=
network
(
inputs
)
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
model_id
=
50
,
depth_multiplier
=
1.0
,
stem_type
=
'v0'
,
se_ratio
=
None
,
resnetd_shortcut
=
False
,
replace_stem_max_pool
=
False
,
init_stochastic_depth_rate
=
0.0
,
scale_stem
=
True
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
bn_trainable
=
True
)
network
=
resnet
.
ResNet
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
resnet
.
ResNet
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/backbones/revnet.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains definitions of RevNet."""
from
typing
import
Any
,
Callable
,
Dict
,
Optional
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.backbones
import
factory
from
official.vision.modeling.layers
import
nn_blocks
# Specifications for different RevNet variants.
# Each entry specifies block configurations of the particular RevNet variant.
# Each element in the block configuration is in the following format:
# (block_fn, num_filters, block_repeats)
REVNET_SPECS
=
{
38
:
[
(
'residual'
,
32
,
3
),
(
'residual'
,
64
,
3
),
(
'residual'
,
112
,
3
),
],
56
:
[
(
'bottleneck'
,
128
,
2
),
(
'bottleneck'
,
256
,
2
),
(
'bottleneck'
,
512
,
3
),
(
'bottleneck'
,
832
,
2
),
],
104
:
[
(
'bottleneck'
,
128
,
2
),
(
'bottleneck'
,
256
,
2
),
(
'bottleneck'
,
512
,
11
),
(
'bottleneck'
,
832
,
2
),
],
}
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
RevNet
(
tf
.
keras
.
Model
):
"""Creates a Reversible ResNet (RevNet) family model.
This implements:
Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
The Reversible Residual Network: Backpropagation Without Storing
Activations.
(https://arxiv.org/pdf/1707.04585.pdf)
"""
def
__init__
(
self
,
model_id
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a RevNet model.
Args:
model_id: An `int` of depth/id of ResNet backbone model.
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_model_id
=
model_id
self
.
_input_specs
=
input_specs
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
axis
=
-
1
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
else
1
# Build RevNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
x
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
REVNET_SPECS
[
model_id
][
0
][
1
],
kernel_size
=
7
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
activation
)(
x
)
x
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'same'
)(
x
)
endpoints
=
{}
for
i
,
spec
in
enumerate
(
REVNET_SPECS
[
model_id
]):
if
spec
[
0
]
==
'residual'
:
inner_block_fn
=
nn_blocks
.
ResidualInner
elif
spec
[
0
]
==
'bottleneck'
:
inner_block_fn
=
nn_blocks
.
BottleneckResidualInner
else
:
raise
ValueError
(
'Block fn `{}` is not supported.'
.
format
(
spec
[
0
]))
if
spec
[
1
]
%
2
!=
0
:
raise
ValueError
(
'Number of output filters must be even to ensure '
'splitting in channel dimension for reversible blocks'
)
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
spec
[
1
],
strides
=
(
1
if
i
==
0
else
2
),
inner_block_fn
=
inner_block_fn
,
block_repeats
=
spec
[
2
],
batch_norm_first
=
(
i
!=
0
),
# Only skip on first block
name
=
'revblock_group_{}'
.
format
(
i
+
2
))
endpoints
[
str
(
i
+
2
)]
=
x
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
}
super
(
RevNet
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
endpoints
,
**
kwargs
)
def
_block_group
(
self
,
inputs
:
tf
.
Tensor
,
filters
:
int
,
strides
:
int
,
inner_block_fn
:
Callable
[...,
tf
.
keras
.
layers
.
Layer
],
block_repeats
:
int
,
batch_norm_first
:
bool
,
name
:
str
=
'revblock_group'
)
->
tf
.
Tensor
:
"""Creates one reversible block for RevNet model.
Args:
inputs: A `tf.Tensor` of size `[batch, channels, height, width]`.
filters: An `int` number of filters for the first convolution of the
layer.
strides: An `int` stride to use for the first convolution of the layer. If
greater than 1, this block group will downsample the input.
inner_block_fn: Either `nn_blocks.ResidualInner` or
`nn_blocks.BottleneckResidualInner`.
block_repeats: An `int` number of blocks contained in this block group.
batch_norm_first: A `bool` that specifies whether to apply
BatchNormalization and activation layer before feeding into convolution
layers.
name: A `str` name for the block.
Returns:
The output `tf.Tensor` of the block layer.
"""
x
=
inputs
for
i
in
range
(
block_repeats
):
is_first_block
=
i
==
0
# Only first residual layer in block gets downsampled
curr_strides
=
strides
if
is_first_block
else
1
f
=
inner_block_fn
(
filters
=
filters
//
2
,
strides
=
curr_strides
,
batch_norm_first
=
batch_norm_first
and
is_first_block
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
g
=
inner_block_fn
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
batch_norm_first
and
is_first_block
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
x
=
nn_blocks
.
ReversibleLayer
(
f
,
g
)(
x
)
return
tf
.
identity
(
x
,
name
=
name
)
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config_dict
=
{
'model_id'
:
self
.
_model_id
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
:
Dict
[
str
,
Any
],
custom_objects
:
Optional
[
Any
]
=
None
)
->
tf
.
keras
.
Model
:
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Dict
[
int
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'revnet'
)
def
build_revnet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds RevNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'revnet'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
return
RevNet
(
model_id
=
backbone_cfg
.
model_id
,
input_specs
=
input_specs
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/backbones/revnet_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for RevNet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
revnet
class
RevNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
56
,
4
),
(
128
,
104
,
4
),
)
def
test_network_creation
(
self
,
input_size
,
model_id
,
endpoint_filter_scale
):
"""Test creation of RevNet family models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
network
=
revnet
.
RevNet
(
model_id
=
model_id
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
network
(
inputs
)
network
.
summary
()
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
2
,
input_size
/
2
**
2
,
128
*
endpoint_filter_scale
],
endpoints
[
'2'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
3
,
input_size
/
2
**
3
,
256
*
endpoint_filter_scale
],
endpoints
[
'3'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
4
,
input_size
/
2
**
4
,
512
*
endpoint_filter_scale
],
endpoints
[
'4'
].
shape
.
as_list
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
5
,
input_size
/
2
**
5
,
832
*
endpoint_filter_scale
],
endpoints
[
'5'
].
shape
.
as_list
())
@
parameterized
.
parameters
(
1
,
3
,
4
)
def
test_input_specs
(
self
,
input_dim
):
"""Test different input feature dimensions."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
input_dim
])
network
=
revnet
.
RevNet
(
model_id
=
56
,
input_specs
=
input_specs
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
128
,
128
,
input_dim
),
batch_size
=
1
)
_
=
network
(
inputs
)
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
model_id
=
56
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
)
network
=
revnet
.
RevNet
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
revnet
.
RevNet
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/backbones/spinenet.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of SpineNet Networks."""
import
math
from
typing
import
Any
,
List
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.backbones
import
factory
from
official.vision.modeling.layers
import
nn_blocks
from
official.vision.modeling.layers
import
nn_layers
from
official.vision.ops
import
spatial_transform_ops
layers
=
tf
.
keras
.
layers
FILTER_SIZE_MAP
=
{
1
:
32
,
2
:
64
,
3
:
128
,
4
:
256
,
5
:
256
,
6
:
256
,
7
:
256
,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS
=
[
(
2
,
'bottleneck'
,
(
0
,
1
),
False
),
(
4
,
'residual'
,
(
0
,
1
),
False
),
(
3
,
'bottleneck'
,
(
2
,
3
),
False
),
(
4
,
'bottleneck'
,
(
2
,
4
),
False
),
(
6
,
'residual'
,
(
3
,
5
),
False
),
(
4
,
'bottleneck'
,
(
3
,
5
),
False
),
(
5
,
'residual'
,
(
6
,
7
),
False
),
(
7
,
'residual'
,
(
6
,
8
),
False
),
(
5
,
'bottleneck'
,
(
8
,
9
),
False
),
(
5
,
'bottleneck'
,
(
8
,
10
),
False
),
(
4
,
'bottleneck'
,
(
5
,
10
),
True
),
(
3
,
'bottleneck'
,
(
4
,
10
),
True
),
(
5
,
'bottleneck'
,
(
7
,
12
),
True
),
(
7
,
'bottleneck'
,
(
5
,
14
),
True
),
(
6
,
'bottleneck'
,
(
12
,
14
),
True
),
(
2
,
'bottleneck'
,
(
2
,
13
),
True
),
]
SCALING_MAP
=
{
'49S'
:
{
'endpoints_num_filters'
:
128
,
'filter_size_scale'
:
0.65
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'49'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
1
,
},
'96'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
0.5
,
'block_repeats'
:
2
,
},
'143'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.0
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
3
,
},
# SpineNet-143 with 1.3x filter_size_scale.
'143L'
:
{
'endpoints_num_filters'
:
256
,
'filter_size_scale'
:
1.3
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
3
,
},
'190'
:
{
'endpoints_num_filters'
:
512
,
'filter_size_scale'
:
1.3
,
'resample_alpha'
:
1.0
,
'block_repeats'
:
4
,
},
}
class
BlockSpec
(
object
):
"""A container class that specifies the block configuration for SpineNet."""
def
__init__
(
self
,
level
:
int
,
block_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
block_fn
=
block_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for SpineNet."""
if
not
block_specs
:
block_specs
=
SPINENET_BLOCK_SPECS
logging
.
info
(
'Building SpineNet block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpineNet
(
tf
.
keras
.
Model
):
"""Creates a SpineNet family model.
This implements:
Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
Yin Cui, Quoc V. Le, Xiaodan Song.
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
(https://arxiv.org/abs/1912.05027)
"""
def
__init__
(
self
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
endpoints_num_filters
:
int
=
256
,
resample_alpha
:
float
=
0.5
,
block_repeats
:
int
=
1
,
filter_size_scale
:
float
=
1.0
,
init_stochastic_depth_rate
:
float
=
0.0
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
**
kwargs
):
"""Initializes a SpineNet model.
Args:
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
min_level: An `int` of min level for output mutiscale features.
max_level: An `int` of max level for output mutiscale features.
block_specs: A list of block specifications for the SpineNet model
discovered by NAS.
endpoints_num_filters: An `int` of feature dimension for the output
endpoints.
resample_alpha: A `float` of resampling factor in cross-scale connections.
block_repeats: An `int` of number of blocks contained in the layer.
filter_size_scale: A `float` of multiplier for the filters (number of
channels) for all convolution ops. The value must be greater than zero.
Typical usage will be to set this value in (0, 1) to reduce the number
of parameters or computation cost of the model.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
endpoints_num_filters
self
.
_resample_alpha
=
resample_alpha
self
.
_block_repeats
=
block_repeats
self
.
_filter_size_scale
=
filter_size_scale
self
.
_init_stochastic_depth_rate
=
init_stochastic_depth_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
activation
==
'relu'
:
self
.
_activation_fn
=
tf
.
nn
.
relu
elif
activation
==
'swish'
:
self
.
_activation_fn
=
tf
.
nn
.
swish
else
:
raise
ValueError
(
'Activation {} not implemented.'
.
format
(
activation
))
self
.
_init_block_fn
=
'bottleneck'
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
# Build SpineNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
=
self
.
_build_stem
(
inputs
=
inputs
)
input_width
=
input_specs
.
shape
[
2
]
if
input_width
is
None
:
max_stride
=
max
(
map
(
lambda
b
:
b
.
level
,
block_specs
))
input_width
=
2
**
max_stride
net
=
self
.
_build_scale_permuted_network
(
net
=
net
,
input_width
=
input_width
)
endpoints
=
self
.
_build_endpoints
(
net
=
net
)
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
}
super
(
SpineNet
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
endpoints
)
def
_block_group
(
self
,
inputs
:
tf
.
Tensor
,
filters
:
int
,
strides
:
int
,
block_fn_cand
:
str
,
block_repeats
:
int
=
1
,
stochastic_depth_drop_rate
:
Optional
[
float
]
=
None
,
name
:
str
=
'block_group'
):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates
=
{
'bottleneck'
:
nn_blocks
.
BottleneckBlock
,
'residual'
:
nn_blocks
.
ResidualBlock
,
}
block_fn
=
block_fn_candidates
[
block_fn_cand
]
_
,
_
,
_
,
num_filters
=
inputs
.
get_shape
().
as_list
()
if
block_fn_cand
==
'bottleneck'
:
use_projection
=
not
(
num_filters
==
(
filters
*
4
)
and
strides
==
1
)
else
:
use_projection
=
not
(
num_filters
==
filters
and
strides
==
1
)
x
=
block_fn
(
filters
=
filters
,
strides
=
strides
,
use_projection
=
use_projection
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
_
in
range
(
1
,
block_repeats
):
x
=
block_fn
(
filters
=
filters
,
strides
=
1
,
use_projection
=
False
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
x
)
return
tf
.
identity
(
x
,
name
=
name
)
def
_build_stem
(
self
,
inputs
):
"""Builds SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
64
,
kernel_size
=
7
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
x
)
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'same'
)(
x
)
net
=
[]
# Build the initial level 2 blocks.
for
i
in
range
(
self
.
_num_init_blocks
):
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
int
(
FILTER_SIZE_MAP
[
2
]
*
self
.
_filter_size_scale
),
strides
=
1
,
block_fn_cand
=
self
.
_init_block_fn
,
block_repeats
=
self
.
_block_repeats
,
name
=
'stem_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
return
net
def
_build_scale_permuted_network
(
self
,
net
,
input_width
,
weighted_fusion
=
False
):
"""Builds scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
**
2
))]
*
len
(
net
)
net_block_fns
=
[
self
.
_init_block_fn
]
*
len
(
net
)
num_outgoing_connections
=
[
0
]
*
len
(
net
)
endpoints
=
{}
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
# Find out specs for the target block.
target_width
=
int
(
math
.
ceil
(
input_width
/
2
**
block_spec
.
level
))
target_num_filters
=
int
(
FILTER_SIZE_MAP
[
block_spec
.
level
]
*
self
.
_filter_size_scale
)
target_block_fn
=
block_spec
.
block_fn
# Resample then merge input0 and input1.
parents
=
[]
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
x0
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input0
],
input_width
=
net_sizes
[
input0
],
input_block_fn
=
net_block_fns
[
input0
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x0
)
num_outgoing_connections
[
input0
]
+=
1
x1
=
self
.
_resample_with_alpha
(
inputs
=
net
[
input1
],
input_width
=
net_sizes
[
input1
],
input_block_fn
=
net_block_fns
[
input1
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
,
target_block_fn
=
target_block_fn
,
alpha
=
self
.
_resample_alpha
)
parents
.
append
(
x1
)
num_outgoing_connections
[
input1
]
+=
1
# Merge 0 outdegree blocks to the output block.
if
block_spec
.
is_output
:
for
j
,
(
j_feat
,
j_connections
)
in
enumerate
(
zip
(
net
,
num_outgoing_connections
)):
if
j_connections
==
0
and
(
j_feat
.
shape
[
2
]
==
target_width
and
j_feat
.
shape
[
3
]
==
x0
.
shape
[
3
]):
parents
.
append
(
j_feat
)
num_outgoing_connections
[
j
]
+=
1
# pylint: disable=g-direct-tensorflow-import
if
weighted_fusion
:
dtype
=
parents
[
0
].
dtype
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
tf
.
add_n
(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
tf
.
add_n
(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
filters
=
target_num_filters
,
strides
=
1
,
block_fn_cand
=
target_block_fn
,
block_repeats
=
self
.
_block_repeats
,
stochastic_depth_drop_rate
=
nn_layers
.
get_stochastic_depth_rate
(
self
.
_init_stochastic_depth_rate
,
i
+
1
,
len
(
self
.
_block_specs
)),
name
=
'scale_permuted_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
net_sizes
.
append
(
target_width
)
net_block_fns
.
append
(
target_block_fn
)
num_outgoing_connections
.
append
(
0
)
# Save output feats.
if
block_spec
.
is_output
:
if
block_spec
.
level
in
endpoints
:
raise
ValueError
(
'Duplicate feats found for output level {}.'
.
format
(
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
logging
.
warning
(
'SpineNet output level out of range [min_level, max_level] = '
'[%s, %s] will not be used for further processing.'
,
self
.
_min_level
,
self
.
_max_level
)
endpoints
[
str
(
block_spec
.
level
)]
=
x
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Matches filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
filters
=
self
.
_endpoints_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
net
[
str
(
level
)])
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
x
)
endpoints
[
str
(
level
)]
=
x
return
endpoints
def
_resample_with_alpha
(
self
,
inputs
,
input_width
,
input_block_fn
,
target_width
,
target_num_filters
,
target_block_fn
,
alpha
=
0.5
):
"""Matches resolution and feature dimension."""
_
,
_
,
_
,
input_num_filters
=
inputs
.
get_shape
().
as_list
()
if
input_block_fn
==
'bottleneck'
:
input_num_filters
/=
4
new_num_filters
=
int
(
input_num_filters
*
alpha
)
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
x
)
# Spatial resampling.
if
input_width
>
target_width
:
x
=
layers
.
Conv2D
(
filters
=
new_num_filters
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation_fn
)(
x
)
input_width
/=
2
while
input_width
>
target_width
:
x
=
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'SAME'
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
)
# Last 1x1 conv to match filter size.
if
target_block_fn
==
'bottleneck'
:
target_num_filters
*=
4
x
=
layers
.
Conv2D
(
filters
=
target_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
return
x
def
get_config
(
self
):
config_dict
=
{
'min_level'
:
self
.
_min_level
,
'max_level'
:
self
.
_max_level
,
'endpoints_num_filters'
:
self
.
_endpoints_num_filters
,
'resample_alpha'
:
self
.
_resample_alpha
,
'block_repeats'
:
self
.
_block_repeats
,
'filter_size_scale'
:
self
.
_filter_size_scale
,
'init_stochastic_depth_rate'
:
self
.
_init_stochastic_depth_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
):
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'spinenet'
)
def
build_spinenet
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds SpineNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'spinenet'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
model_id
=
backbone_cfg
.
model_id
if
model_id
not
in
SCALING_MAP
:
raise
ValueError
(
'SpineNet-{} is not a valid architecture.'
.
format
(
model_id
))
scaling_params
=
SCALING_MAP
[
model_id
]
return
SpineNet
(
input_specs
=
input_specs
,
min_level
=
backbone_cfg
.
min_level
,
max_level
=
backbone_cfg
.
max_level
,
endpoints_num_filters
=
scaling_params
[
'endpoints_num_filters'
],
resample_alpha
=
scaling_params
[
'resample_alpha'
],
block_repeats
=
scaling_params
[
'block_repeats'
],
filter_size_scale
=
scaling_params
[
'filter_size_scale'
],
init_stochastic_depth_rate
=
backbone_cfg
.
stochastic_depth_drop_rate
,
kernel_regularizer
=
l2_regularizer
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
)
official/vision/modeling/backbones/spinenet_mobile.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions of Mobile SpineNet Networks."""
import
math
from
typing
import
Any
,
List
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.backbones
import
factory
from
official.vision.modeling.layers
import
nn_blocks
from
official.vision.modeling.layers
import
nn_layers
from
official.vision.ops
import
spatial_transform_ops
layers
=
tf
.
keras
.
layers
FILTER_SIZE_MAP
=
{
0
:
8
,
1
:
16
,
2
:
24
,
3
:
40
,
4
:
80
,
5
:
112
,
6
:
112
,
7
:
112
,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS
=
[
(
2
,
'mbconv'
,
(
0
,
1
),
False
),
(
2
,
'mbconv'
,
(
1
,
2
),
False
),
(
4
,
'mbconv'
,
(
1
,
2
),
False
),
(
3
,
'mbconv'
,
(
3
,
4
),
False
),
(
4
,
'mbconv'
,
(
3
,
5
),
False
),
(
6
,
'mbconv'
,
(
4
,
6
),
False
),
(
4
,
'mbconv'
,
(
4
,
6
),
False
),
(
5
,
'mbconv'
,
(
7
,
8
),
False
),
(
7
,
'mbconv'
,
(
7
,
9
),
False
),
(
5
,
'mbconv'
,
(
9
,
10
),
False
),
(
5
,
'mbconv'
,
(
9
,
11
),
False
),
(
4
,
'mbconv'
,
(
6
,
11
),
True
),
(
3
,
'mbconv'
,
(
5
,
11
),
True
),
(
5
,
'mbconv'
,
(
8
,
13
),
True
),
(
7
,
'mbconv'
,
(
6
,
15
),
True
),
(
6
,
'mbconv'
,
(
13
,
15
),
True
),
]
SCALING_MAP
=
{
'49'
:
{
'endpoints_num_filters'
:
48
,
'filter_size_scale'
:
1.0
,
'block_repeats'
:
1
,
},
'49S'
:
{
'endpoints_num_filters'
:
40
,
'filter_size_scale'
:
0.65
,
'block_repeats'
:
1
,
},
'49XS'
:
{
'endpoints_num_filters'
:
24
,
'filter_size_scale'
:
0.6
,
'block_repeats'
:
1
,
},
}
class
BlockSpec
(
object
):
"""A container class that specifies the block configuration for SpineNet."""
def
__init__
(
self
,
level
:
int
,
block_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
block_fn
=
block_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for SpineNet."""
if
not
block_specs
:
block_specs
=
SPINENET_BLOCK_SPECS
logging
.
info
(
'Building SpineNet block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpineNetMobile
(
tf
.
keras
.
Model
):
"""Creates a Mobile SpineNet family model.
This implements:
[1] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Golnaz Ghiasi, Mingxing Tan,
Yin Cui, Quoc V. Le, Xiaodan Song.
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization.
(https://arxiv.org/abs/1912.05027).
[2] Xianzhi Du, Tsung-Yi Lin, Pengchong Jin, Yin Cui, Mingxing Tan,
Quoc Le, Xiaodan Song.
Efficient Scale-Permuted Backbone with Learned Resource Distribution.
(https://arxiv.org/abs/2010.11426).
"""
def
__init__
(
self
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
endpoints_num_filters
:
int
=
256
,
se_ratio
:
float
=
0.2
,
block_repeats
:
int
=
1
,
filter_size_scale
:
float
=
1.0
,
expand_ratio
:
int
=
6
,
init_stochastic_depth_rate
=
0.0
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
use_keras_upsampling_2d
:
bool
=
False
,
**
kwargs
):
"""Initializes a Mobile SpineNet model.
Args:
input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
min_level: An `int` of min level for output mutiscale features.
max_level: An `int` of max level for output mutiscale features.
block_specs: The block specifications for the SpineNet model discovered by
NAS.
endpoints_num_filters: An `int` of feature dimension for the output
endpoints.
se_ratio: A `float` of Squeeze-and-Excitation ratio.
block_repeats: An `int` of number of blocks contained in the layer.
filter_size_scale: A `float` of multiplier for the filters (number of
channels) for all convolution ops. The value must be greater than zero.
Typical usage will be to set this value in (0, 1) to reduce the number
of parameters or computation cost of the model.
expand_ratio: An `integer` of expansion ratios for inverted bottleneck
blocks.
init_stochastic_depth_rate: A `float` of initial stochastic depth rate.
kernel_initializer: A str for kernel initializer of convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_input_specs
=
input_specs
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_endpoints_num_filters
=
endpoints_num_filters
self
.
_se_ratio
=
se_ratio
self
.
_block_repeats
=
block_repeats
self
.
_filter_size_scale
=
filter_size_scale
self
.
_expand_ratio
=
expand_ratio
self
.
_init_stochastic_depth_rate
=
init_stochastic_depth_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_activation
=
activation
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_use_keras_upsampling_2d
=
use_keras_upsampling_2d
self
.
_num_init_blocks
=
2
if
use_sync_bn
:
self
.
_norm
=
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
# Build SpineNet.
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:])
net
=
self
.
_build_stem
(
inputs
=
inputs
)
input_width
=
input_specs
.
shape
[
2
]
if
input_width
is
None
:
max_stride
=
max
(
map
(
lambda
b
:
b
.
level
,
block_specs
))
input_width
=
2
**
max_stride
net
=
self
.
_build_scale_permuted_network
(
net
=
net
,
input_width
=
input_width
)
endpoints
=
self
.
_build_endpoints
(
net
=
net
)
self
.
_output_specs
=
{
l
:
endpoints
[
l
].
get_shape
()
for
l
in
endpoints
}
super
().
__init__
(
inputs
=
inputs
,
outputs
=
endpoints
)
def
_block_group
(
self
,
inputs
:
tf
.
Tensor
,
in_filters
:
int
,
out_filters
:
int
,
strides
:
int
,
expand_ratio
:
int
=
6
,
block_repeats
:
int
=
1
,
se_ratio
:
float
=
0.2
,
stochastic_depth_drop_rate
:
Optional
[
float
]
=
None
,
name
:
str
=
'block_group'
):
"""Creates one group of blocks for the SpineNet model."""
x
=
nn_blocks
.
InvertedBottleneckBlock
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
strides
=
strides
,
se_ratio
=
se_ratio
,
expand_ratio
=
expand_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
for
_
in
range
(
1
,
block_repeats
):
x
=
nn_blocks
.
InvertedBottleneckBlock
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
strides
=
1
,
se_ratio
=
se_ratio
,
expand_ratio
=
expand_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_activation
,
use_sync_bn
=
self
.
_use_sync_bn
,
norm_momentum
=
self
.
_norm_momentum
,
norm_epsilon
=
self
.
_norm_epsilon
)(
inputs
)
return
tf
.
keras
.
layers
.
Activation
(
'linear'
,
name
=
name
)(
x
)
def
_build_stem
(
self
,
inputs
):
"""Builds SpineNet stem."""
x
=
layers
.
Conv2D
(
filters
=
int
(
FILTER_SIZE_MAP
[
0
]
*
self
.
_filter_size_scale
),
kernel_size
=
3
,
strides
=
2
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
inputs
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
net
=
[]
stem_strides
=
[
1
,
2
]
# Build the initial level 2 blocks.
for
i
in
range
(
self
.
_num_init_blocks
):
x
=
self
.
_block_group
(
inputs
=
x
,
in_filters
=
int
(
FILTER_SIZE_MAP
[
i
]
*
self
.
_filter_size_scale
),
out_filters
=
int
(
FILTER_SIZE_MAP
[
i
+
1
]
*
self
.
_filter_size_scale
),
expand_ratio
=
self
.
_expand_ratio
,
strides
=
stem_strides
[
i
],
se_ratio
=
self
.
_se_ratio
,
block_repeats
=
self
.
_block_repeats
,
name
=
'stem_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
return
net
def
_build_scale_permuted_network
(
self
,
net
,
input_width
,
weighted_fusion
=
False
):
"""Builds scale-permuted network."""
net_sizes
=
[
int
(
math
.
ceil
(
input_width
/
2
)),
int
(
math
.
ceil
(
input_width
/
2
**
2
))
]
num_outgoing_connections
=
[
0
]
*
len
(
net
)
endpoints
=
{}
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
# Update block level if it is larger than max_level to avoid building
# blocks smaller than requested.
block_spec
.
level
=
min
(
block_spec
.
level
,
self
.
_max_level
)
# Find out specs for the target block.
target_width
=
int
(
math
.
ceil
(
input_width
/
2
**
block_spec
.
level
))
target_num_filters
=
int
(
FILTER_SIZE_MAP
[
block_spec
.
level
]
*
self
.
_filter_size_scale
)
# Resample then merge input0 and input1.
parents
=
[]
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
x0
=
self
.
_resample_with_sepconv
(
inputs
=
net
[
input0
],
input_width
=
net_sizes
[
input0
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
)
parents
.
append
(
x0
)
num_outgoing_connections
[
input0
]
+=
1
x1
=
self
.
_resample_with_sepconv
(
inputs
=
net
[
input1
],
input_width
=
net_sizes
[
input1
],
target_width
=
target_width
,
target_num_filters
=
target_num_filters
)
parents
.
append
(
x1
)
num_outgoing_connections
[
input1
]
+=
1
# Merge 0 outdegree blocks to the output block.
if
block_spec
.
is_output
:
for
j
,
(
j_feat
,
j_connections
)
in
enumerate
(
zip
(
net
,
num_outgoing_connections
)):
if
j_connections
==
0
and
(
j_feat
.
shape
[
2
]
==
target_width
and
j_feat
.
shape
[
3
]
==
x0
.
shape
[
3
]):
parents
.
append
(
j_feat
)
num_outgoing_connections
[
j
]
+=
1
# pylint: disable=g-direct-tensorflow-import
if
weighted_fusion
:
dtype
=
parents
[
0
].
dtype
parent_weights
=
[
tf
.
nn
.
relu
(
tf
.
cast
(
tf
.
Variable
(
1.0
,
name
=
'block{}_fusion{}'
.
format
(
i
,
j
)),
dtype
=
dtype
))
for
j
in
range
(
len
(
parents
))]
weights_sum
=
layers
.
Add
()(
parent_weights
)
parents
=
[
parents
[
i
]
*
parent_weights
[
i
]
/
(
weights_sum
+
0.0001
)
for
i
in
range
(
len
(
parents
))
]
# Fuse all parent nodes then build a new block.
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
layers
.
Add
()(
parents
))
x
=
self
.
_block_group
(
inputs
=
x
,
in_filters
=
target_num_filters
,
out_filters
=
target_num_filters
,
strides
=
1
,
se_ratio
=
self
.
_se_ratio
,
expand_ratio
=
self
.
_expand_ratio
,
block_repeats
=
self
.
_block_repeats
,
stochastic_depth_drop_rate
=
nn_layers
.
get_stochastic_depth_rate
(
self
.
_init_stochastic_depth_rate
,
i
+
1
,
len
(
self
.
_block_specs
)),
name
=
'scale_permuted_block_{}'
.
format
(
i
+
1
))
net
.
append
(
x
)
net_sizes
.
append
(
target_width
)
num_outgoing_connections
.
append
(
0
)
# Save output feats.
if
block_spec
.
is_output
:
if
block_spec
.
level
in
endpoints
:
raise
ValueError
(
'Duplicate feats found for output level {}.'
.
format
(
block_spec
.
level
))
if
(
block_spec
.
level
<
self
.
_min_level
or
block_spec
.
level
>
self
.
_max_level
):
logging
.
warning
(
'SpineNet output level out of range [min_level, max_levle] = [%s, %s] will not be used for further processing.'
,
self
.
_min_level
,
self
.
_max_level
)
endpoints
[
str
(
block_spec
.
level
)]
=
x
return
endpoints
def
_build_endpoints
(
self
,
net
):
"""Matches filter size for endpoints before sharing conv layers."""
endpoints
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
x
=
layers
.
Conv2D
(
filters
=
self
.
_endpoints_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
net
[
str
(
level
)])
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
endpoints
[
str
(
level
)]
=
x
return
endpoints
def
_resample_with_sepconv
(
self
,
inputs
,
input_width
,
target_width
,
target_num_filters
):
"""Matches resolution and feature dimension."""
x
=
inputs
# Spatial resampling.
if
input_width
>
target_width
:
while
input_width
>
target_width
:
x
=
layers
.
DepthwiseConv2D
(
kernel_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
x
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)(
x
)
input_width
/=
2
elif
input_width
<
target_width
:
scale
=
target_width
//
input_width
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
,
use_keras_layer
=
self
.
_use_keras_upsampling_2d
)
# Last 1x1 conv to match filter size.
x
=
layers
.
Conv2D
(
filters
=
target_num_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)(
x
)
x
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)(
x
)
return
x
def
get_config
(
self
):
config_dict
=
{
'min_level'
:
self
.
_min_level
,
'max_level'
:
self
.
_max_level
,
'endpoints_num_filters'
:
self
.
_endpoints_num_filters
,
'se_ratio'
:
self
.
_se_ratio
,
'expand_ratio'
:
self
.
_expand_ratio
,
'block_repeats'
:
self
.
_block_repeats
,
'filter_size_scale'
:
self
.
_filter_size_scale
,
'init_stochastic_depth_rate'
:
self
.
_init_stochastic_depth_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'use_keras_upsampling_2d'
:
self
.
_use_keras_upsampling_2d
,
}
return
config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
):
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_backbone_builder
(
'spinenet_mobile'
)
def
build_spinenet_mobile
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
backbone_config
:
hyperparams
.
Config
,
norm_activation_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Mobile SpineNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'spinenet_mobile'
,
(
f
'Inconsistent backbone type '
f
'
{
backbone_type
}
'
)
model_id
=
backbone_cfg
.
model_id
if
model_id
not
in
SCALING_MAP
:
raise
ValueError
(
'Mobile SpineNet-{} is not a valid architecture.'
.
format
(
model_id
))
scaling_params
=
SCALING_MAP
[
model_id
]
return
SpineNetMobile
(
input_specs
=
input_specs
,
min_level
=
backbone_cfg
.
min_level
,
max_level
=
backbone_cfg
.
max_level
,
endpoints_num_filters
=
scaling_params
[
'endpoints_num_filters'
],
block_repeats
=
scaling_params
[
'block_repeats'
],
filter_size_scale
=
scaling_params
[
'filter_size_scale'
],
se_ratio
=
backbone_cfg
.
se_ratio
,
expand_ratio
=
backbone_cfg
.
expand_ratio
,
init_stochastic_depth_rate
=
backbone_cfg
.
stochastic_depth_drop_rate
,
kernel_regularizer
=
l2_regularizer
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
use_keras_upsampling_2d
=
backbone_cfg
.
use_keras_upsampling_2d
)
official/vision/modeling/backbones/spinenet_mobile_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for SpineNet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
spinenet_mobile
class
SpineNetMobileTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
0.6
,
1
,
0.0
,
24
),
(
128
,
0.65
,
1
,
0.2
,
40
),
(
256
,
1.0
,
1
,
0.2
,
48
),
)
def
test_network_creation
(
self
,
input_size
,
filter_size_scale
,
block_repeats
,
se_ratio
,
endpoints_num_filters
):
"""Test creation of SpineNet models."""
min_level
=
3
max_level
=
7
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
,
input_size
,
3
])
model
=
spinenet_mobile
.
SpineNetMobile
(
input_specs
=
input_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
endpoints_num_filters
=
endpoints_num_filters
,
resample_alpha
=
se_ratio
,
block_repeats
=
block_repeats
,
filter_size_scale
=
filter_size_scale
,
init_stochastic_depth_rate
=
0.2
,
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
model
(
inputs
)
for
l
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
l
),
endpoints
.
keys
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
l
,
input_size
/
2
**
l
,
endpoints_num_filters
],
endpoints
[
str
(
l
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
256
,
se_ratio
=
0.2
,
expand_ratio
=
6
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
init_stochastic_depth_rate
=
0.2
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
use_keras_upsampling_2d
=
False
,
)
network
=
spinenet_mobile
.
SpineNetMobile
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
spinenet_mobile
.
SpineNetMobile
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/backbones/spinenet_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for SpineNet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
spinenet
class
SpineNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
0.65
,
1
,
0.5
,
128
,
4
,
6
),
(
256
,
1.0
,
1
,
0.5
,
256
,
3
,
6
),
(
384
,
1.0
,
2
,
0.5
,
256
,
4
,
7
),
(
512
,
1.0
,
3
,
1.0
,
256
,
3
,
7
),
(
640
,
1.3
,
4
,
1.0
,
384
,
3
,
7
),
)
def
test_network_creation
(
self
,
input_size
,
filter_size_scale
,
block_repeats
,
resample_alpha
,
endpoints_num_filters
,
min_level
,
max_level
):
"""Test creation of SpineNet models."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
,
input_size
,
3
])
model
=
spinenet
.
SpineNet
(
input_specs
=
input_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
endpoints_num_filters
=
endpoints_num_filters
,
resample_alpha
=
resample_alpha
,
block_repeats
=
block_repeats
,
filter_size_scale
=
filter_size_scale
,
init_stochastic_depth_rate
=
0.2
,
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
endpoints
=
model
(
inputs
)
for
l
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
l
),
endpoints
.
keys
())
self
.
assertAllEqual
(
[
1
,
input_size
/
2
**
l
,
input_size
/
2
**
l
,
endpoints_num_filters
],
endpoints
[
str
(
l
)].
shape
.
as_list
())
@
parameterized
.
parameters
(
((
128
,
128
),
(
128
,
128
)),
((
128
,
128
),
(
256
,
256
)),
((
640
,
640
),
(
896
,
1664
)),
)
def
test_load_from_different_input_specs
(
self
,
input_size_1
,
input_size_2
):
"""Test loading checkpoints with different input size."""
def
build_spinenet
(
input_size
):
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model
=
spinenet
.
SpineNet
(
input_specs
=
input_specs
,
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
384
,
resample_alpha
=
1.0
,
block_repeats
=
2
,
filter_size_scale
=
0.5
)
return
model
model_1
=
build_spinenet
(
input_size_1
)
model_2
=
build_spinenet
(
input_size_2
)
ckpt_1
=
tf
.
train
.
Checkpoint
(
backbone
=
model_1
)
ckpt_2
=
tf
.
train
.
Checkpoint
(
backbone
=
model_2
)
ckpt_path
=
self
.
get_temp_dir
()
+
'/ckpt'
ckpt_1
.
write
(
ckpt_path
)
ckpt_2
.
restore
(
ckpt_path
).
expect_partial
()
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
min_level
=
3
,
max_level
=
7
,
endpoints_num_filters
=
256
,
resample_alpha
=
0.5
,
block_repeats
=
1
,
filter_size_scale
=
1.0
,
init_stochastic_depth_rate
=
0.2
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
spinenet
.
SpineNet
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
spinenet
.
SpineNet
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/classification_model.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build classification models."""
from
typing
import
Any
,
Mapping
,
Optional
# Import libraries
import
tensorflow
as
tf
layers
=
tf
.
keras
.
layers
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ClassificationModel
(
tf
.
keras
.
Model
):
"""A classification class builder."""
def
__init__
(
self
,
backbone
:
tf
.
keras
.
Model
,
num_classes
:
int
,
input_specs
:
tf
.
keras
.
layers
.
InputSpec
=
layers
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
3
]),
dropout_rate
:
float
=
0.0
,
kernel_initializer
:
str
=
'random_uniform'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
add_head_batch_norm
:
bool
=
False
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
skip_logits_layer
:
bool
=
False
,
**
kwargs
):
"""Classification initialization function.
Args:
backbone: a backbone network.
num_classes: `int` number of classes in classification task.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
dropout_rate: `float` rate for dropout regularization.
kernel_initializer: kernel initializer for the dense layer.
kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
add_head_batch_norm: `bool` whether to add a batch normalization layer
before pool.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
skip_logits_layer: `bool`, whether to skip the prediction layer.
**kwargs: keyword arguments to be passed.
"""
if
use_sync_bn
:
norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
norm
=
tf
.
keras
.
layers
.
BatchNormalization
axis
=
-
1
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
else
1
inputs
=
tf
.
keras
.
Input
(
shape
=
input_specs
.
shape
[
1
:],
name
=
input_specs
.
name
)
endpoints
=
backbone
(
inputs
)
x
=
endpoints
[
max
(
endpoints
.
keys
())]
if
add_head_batch_norm
:
x
=
norm
(
axis
=
axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
x
)
x
=
tf
.
keras
.
layers
.
GlobalAveragePooling2D
()(
x
)
if
not
skip_logits_layer
:
x
=
tf
.
keras
.
layers
.
Dropout
(
dropout_rate
)(
x
)
x
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
x
)
super
(
ClassificationModel
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
x
,
**
kwargs
)
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'num_classes'
:
num_classes
,
'input_specs'
:
input_specs
,
'dropout_rate'
:
dropout_rate
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'add_head_batch_norm'
:
add_head_batch_norm
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
}
self
.
_input_specs
=
input_specs
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_backbone
=
backbone
self
.
_norm
=
norm
@
property
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
tf
.
keras
.
Model
]:
"""Returns a dictionary of items to be additionally checkpointed."""
return
dict
(
backbone
=
self
.
backbone
)
@
property
def
backbone
(
self
)
->
tf
.
keras
.
Model
:
return
self
.
_backbone
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/modeling/classification_model_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for classification network."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.modeling
import
backbones
from
official.vision.modeling
import
classification_model
class
ClassificationNetworkTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
128
,
50
,
'relu'
),
(
128
,
50
,
'relu'
),
(
128
,
50
,
'swish'
),
)
def
test_resnet_network_creation
(
self
,
input_size
,
resnet_model_id
,
activation
):
"""Test for creation of a ResNet-50 classifier."""
inputs
=
np
.
random
.
rand
(
2
,
input_size
,
input_size
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet
(
model_id
=
resnet_model_id
,
activation
=
activation
)
self
.
assertEqual
(
backbone
.
count_params
(),
23561152
)
num_classes
=
1000
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
)
self
.
assertEqual
(
model
.
count_params
(),
25610152
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
def
test_revnet_network_creation
(
self
):
"""Test for creation of a RevNet-56 classifier."""
revnet_model_id
=
56
inputs
=
np
.
random
.
rand
(
2
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
RevNet
(
model_id
=
revnet_model_id
)
self
.
assertEqual
(
backbone
.
count_params
(),
19473792
)
num_classes
=
1000
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
add_head_batch_norm
=
True
,
)
self
.
assertEqual
(
model
.
count_params
(),
22816104
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
combinations
.
generate
(
combinations
.
combine
(
mobilenet_model_id
=
[
'MobileNetV1'
,
'MobileNetV2'
,
'MobileNetV3Large'
,
'MobileNetV3Small'
,
'MobileNetV3EdgeTPU'
,
'MobileNetMultiAVG'
,
'MobileNetMultiMAX'
,
],
filter_size_scale
=
[
1.0
,
0.75
],
))
def
test_mobilenet_network_creation
(
self
,
mobilenet_model_id
,
filter_size_scale
):
"""Test for creation of a MobileNet classifier."""
inputs
=
np
.
random
.
rand
(
2
,
224
,
224
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
MobileNet
(
model_id
=
mobilenet_model_id
,
filter_size_scale
=
filter_size_scale
)
num_classes
=
1001
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
num_classes
,
dropout_rate
=
0.2
,
)
logits
=
model
(
inputs
)
self
.
assertAllEqual
([
2
,
num_classes
],
logits
.
numpy
().
shape
)
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],
use_sync_bn
=
[
False
,
True
],
))
def
test_sync_bn_multiple_devices
(
self
,
strategy
,
use_sync_bn
):
"""Test for sync bn on TPU and GPU devices."""
inputs
=
np
.
random
.
rand
(
64
,
128
,
128
,
3
)
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
with
strategy
.
scope
():
backbone
=
backbones
.
ResNet
(
model_id
=
50
,
use_sync_bn
=
use_sync_bn
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
,
dropout_rate
=
0.2
,
)
_
=
model
(
inputs
)
@
combinations
.
generate
(
combinations
.
combine
(
strategy
=
[
strategy_combinations
.
one_device_strategy_gpu
,
],
data_format
=
[
'channels_last'
,
'channels_first'
],
input_dim
=
[
1
,
3
,
4
]))
def
test_data_format_gpu
(
self
,
strategy
,
data_format
,
input_dim
):
"""Test for different data formats on GPU devices."""
if
data_format
==
'channels_last'
:
inputs
=
np
.
random
.
rand
(
2
,
128
,
128
,
input_dim
)
else
:
inputs
=
np
.
random
.
rand
(
2
,
input_dim
,
128
,
128
)
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
inputs
.
shape
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
with
strategy
.
scope
():
backbone
=
backbones
.
ResNet
(
model_id
=
50
,
input_specs
=
input_specs
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
,
input_specs
=
input_specs
,
)
_
=
model
(
inputs
)
def
test_serialize_deserialize
(
self
):
"""Validate the classification net can be serialized and deserialized."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet
(
model_id
=
50
)
model
=
classification_model
.
ClassificationModel
(
backbone
=
backbone
,
num_classes
=
1000
)
config
=
model
.
get_config
()
new_model
=
classification_model
.
ClassificationModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/__init__.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Decoders package definition."""
from
official.vision.modeling.decoders.aspp
import
ASPP
from
official.vision.modeling.decoders.fpn
import
FPN
from
official.vision.modeling.decoders.nasfpn
import
NASFPN
official/vision/modeling/decoders/aspp.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.vision.modeling.decoders
import
factory
from
official.vision.modeling.layers
import
deeplab
from
official.vision.modeling.layers
import
nn_layers
TensorMapUnion
=
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ASPP
(
tf
.
keras
.
layers
.
Layer
):
"""Creates an Atrous Spatial Pyramid Pooling (ASPP) layer."""
def
__init__
(
self
,
level
:
int
,
dilation_rates
:
List
[
int
],
num_filters
:
int
=
256
,
pool_kernel_size
:
Optional
[
int
]
=
None
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
activation
:
str
=
'relu'
,
dropout_rate
:
float
=
0.0
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
interpolation
:
str
=
'bilinear'
,
use_depthwise_convolution
:
bool
=
False
,
spp_layer_version
:
str
=
'v1'
,
output_tensor
:
bool
=
False
,
**
kwargs
):
"""Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
Args:
level: An `int` level to apply ASPP.
dilation_rates: A `list` of dilation rates.
num_filters: An `int` number of output filters in ASPP.
pool_kernel_size: A `list` of [height, width] of pooling kernel size or
None. Pooling size is with respect to original image size, it will be
scaled down by 2**level. If None, global average pooling is used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
activation: A `str` activation to be used in ASPP.
dropout_rate: A `float` rate for dropout regularization.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
interpolation: A `str` of interpolation method. It should be one of
`bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
`gaussian`, or `mitchellcubic`.
use_depthwise_convolution: If True depthwise separable convolutions will
be added to the Atrous spatial pyramid pooling.
spp_layer_version: A `str` of spatial pyramid pooling layer version.
output_tensor: Whether to output a single tensor or a dictionary of tensor.
Default is false.
**kwargs: Additional keyword arguments to be passed.
"""
super
().
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'level'
:
level
,
'dilation_rates'
:
dilation_rates
,
'num_filters'
:
num_filters
,
'pool_kernel_size'
:
pool_kernel_size
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'activation'
:
activation
,
'dropout_rate'
:
dropout_rate
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'interpolation'
:
interpolation
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'spp_layer_version'
:
spp_layer_version
,
'output_tensor'
:
output_tensor
}
self
.
_aspp_layer
=
deeplab
.
SpatialPyramidPooling
if
self
.
_config_dict
[
'spp_layer_version'
]
==
'v1'
else
nn_layers
.
SpatialPyramidPooling
def
build
(
self
,
input_shape
):
pool_kernel_size
=
None
if
self
.
_config_dict
[
'pool_kernel_size'
]:
pool_kernel_size
=
[
int
(
p_size
//
2
**
self
.
_config_dict
[
'level'
])
for
p_size
in
self
.
_config_dict
[
'pool_kernel_size'
]
]
self
.
aspp
=
self
.
_aspp_layer
(
output_channels
=
self
.
_config_dict
[
'num_filters'
],
dilation_rates
=
self
.
_config_dict
[
'dilation_rates'
],
pool_kernel_size
=
pool_kernel_size
,
use_sync_bn
=
self
.
_config_dict
[
'use_sync_bn'
],
batchnorm_momentum
=
self
.
_config_dict
[
'norm_momentum'
],
batchnorm_epsilon
=
self
.
_config_dict
[
'norm_epsilon'
],
activation
=
self
.
_config_dict
[
'activation'
],
dropout
=
self
.
_config_dict
[
'dropout_rate'
],
kernel_initializer
=
self
.
_config_dict
[
'kernel_initializer'
],
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
interpolation
=
self
.
_config_dict
[
'interpolation'
],
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
]
)
def
call
(
self
,
inputs
:
TensorMapUnion
)
->
TensorMapUnion
:
"""Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
level is present, if output_tensor is false. Hence, this will be compatible
with the rest of the segmentation model interfaces.
If output_tensor is true, a single tensot is output.
Args:
inputs: A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or
a `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of shape [batch, height_l, width_l,
filter_size].
Returns:
A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or a `dict`
of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of output of ASPP module.
"""
outputs
=
{}
level
=
str
(
self
.
_config_dict
[
'level'
])
backbone_output
=
inputs
[
level
]
if
isinstance
(
inputs
,
dict
)
else
inputs
outputs
=
self
.
aspp
(
backbone_output
)
return
outputs
if
self
.
_config_dict
[
'output_tensor'
]
else
{
level
:
outputs
}
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
self
.
_config_dict
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
factory
.
register_decoder_builder
(
'aspp'
)
def
build_aspp_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds ASPP decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. Note this is for consistent
interface, and is not used by ASPP decoder.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the ASPP decoder.
Raises:
ValueError: If the model_config.decoder.type is not `aspp`.
"""
del
input_specs
# input_specs is not used by ASPP decoder.
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'aspp'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `aspp`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
ASPP
(
level
=
decoder_cfg
.
level
,
dilation_rates
=
decoder_cfg
.
dilation_rates
,
num_filters
=
decoder_cfg
.
num_filters
,
use_depthwise_convolution
=
decoder_cfg
.
use_depthwise_convolution
,
pool_kernel_size
=
decoder_cfg
.
pool_kernel_size
,
dropout_rate
=
decoder_cfg
.
dropout_rate
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
activation
=
norm_activation_config
.
activation
,
kernel_regularizer
=
l2_regularizer
,
spp_layer_version
=
decoder_cfg
.
spp_layer_version
,
output_tensor
=
decoder_cfg
.
output_tensor
)
official/vision/modeling/decoders/aspp_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for aspp."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
aspp
class
ASPPTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
3
,
[
6
,
12
,
18
,
24
],
128
,
'v1'
),
(
3
,
[
6
,
12
,
18
],
128
,
'v1'
),
(
3
,
[
6
,
12
],
256
,
'v1'
),
(
4
,
[
6
,
12
,
18
,
24
],
128
,
'v2'
),
(
4
,
[
6
,
12
,
18
],
128
,
'v2'
),
(
4
,
[
6
,
12
],
256
,
'v2'
),
)
def
test_network_creation
(
self
,
level
,
dilation_rates
,
num_filters
,
spp_layer_version
):
"""Test creation of ASPP."""
input_size
=
256
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
,
spp_layer_version
=
spp_layer_version
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
num_filters
],
feats
[
str
(
level
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
level
=
3
,
dilation_rates
=
[
6
,
12
],
num_filters
=
256
,
pool_kernel_size
=
None
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
activation
=
'relu'
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
interpolation
=
'bilinear'
,
dropout_rate
=
0.2
,
use_depthwise_convolution
=
'false'
,
spp_layer_version
=
'v1'
,
output_tensor
=
False
,
dtype
=
'float32'
,
name
=
'aspp'
,
trainable
=
True
)
network
=
aspp
.
ASPP
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
aspp
.
ASPP
.
from_config
(
network
.
get_config
())
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/factory.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoder registers and factory method.
One can register a new decoder model by the following two steps:
1 Import the factory and register the build in the decoder file.
2 Import the decoder class and add a build in __init__.py.
```
# my_decoder.py
from modeling.decoders import factory
class MyDecoder():
...
@factory.register_decoder_builder('my_decoder')
def build_my_decoder():
return MyDecoder()
# decoders/__init__.py adds import
from modeling.decoders.my_decoder import MyDecoder
```
If one wants the MyDecoder class to be used only by those binary
then don't imported the decoder module in decoders/__init__.py, but import it
in place that uses it.
"""
from
typing
import
Any
,
Callable
,
Mapping
,
Optional
,
Union
# Import libraries
import
tensorflow
as
tf
from
official.core
import
registry
from
official.modeling
import
hyperparams
_REGISTERED_DECODER_CLS
=
{}
def
register_decoder_builder
(
key
:
str
)
->
Callable
[...,
Any
]:
"""Decorates a builder of decoder class.
The builder should be a Callable (a class or a function).
This decorator supports registration of decoder builder as follows:
```
class MyDecoder(tf.keras.Model):
pass
@register_decoder_builder('mydecoder')
def builder(input_specs, config, l2_reg):
return MyDecoder(...)
# Builds a MyDecoder object.
my_decoder = build_decoder_3d(input_specs, config, l2_reg)
```
Args:
key: A `str` of key to look up the builder.
Returns:
A callable for using as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return
registry
.
register
(
_REGISTERED_DECODER_CLS
,
key
)
@
register_decoder_builder
(
'identity'
)
def
build_identity
(
input_specs
:
Optional
[
Mapping
[
str
,
tf
.
TensorShape
]]
=
None
,
model_config
:
Optional
[
hyperparams
.
Config
]
=
None
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
None
:
"""Builds identity decoder from a config.
All the input arguments are not used by identity decoder but kept here to
ensure the interface is consistent.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
An instance of the identity decoder.
"""
del
input_specs
,
model_config
,
l2_regularizer
# Unused by identity decoder.
def
build_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
tf
.
keras
.
regularizers
.
Regularizer
=
None
,
**
kwargs
)
->
Union
[
None
,
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]:
# pytype: disable=annotation-type-mismatch # typed-keras
"""Builds decoder from a config.
A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
None, the decoder will take features from the backbone as input and generate
decoded feature maps. If it is None, such as an identity decoder, the decoder
is skipped and features from the backbone are regarded as model output.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
**kwargs: Additional keyword args to be passed to decoder builder.
Returns:
An instance of the decoder.
"""
decoder_builder
=
registry
.
lookup
(
_REGISTERED_DECODER_CLS
,
model_config
.
decoder
.
type
)
return
decoder_builder
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
,
**
kwargs
)
official/vision/modeling/decoders/factory_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
official.vision
import
configs
from
official.vision.configs
import
decoders
as
decoders_cfg
from
official.vision.modeling
import
decoders
from
official.vision.modeling.decoders
import
factory
class
FactoryTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
use_separable_conv
=
[
True
,
False
]))
def
test_fpn_decoder_creation
(
self
,
num_filters
,
use_separable_conv
):
"""Test creation of FPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
FPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'fpn'
,
fpn
=
decoders_cfg
.
FPN
(
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
num_filters
=
[
128
,
256
],
num_repeats
=
[
3
,
5
],
use_separable_conv
=
[
True
,
False
]))
def
test_nasfpn_decoder_creation
(
self
,
num_filters
,
num_repeats
,
use_separable_conv
):
"""Test creation of NASFPN decoder."""
min_level
=
3
max_level
=
7
input_specs
=
{}
for
level
in
range
(
min_level
,
max_level
):
input_specs
[
str
(
level
)]
=
tf
.
TensorShape
(
[
1
,
128
//
(
2
**
level
),
128
//
(
2
**
level
),
3
])
network
=
decoders
.
NASFPN
(
input_specs
=
input_specs
,
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
,
use_sync_bn
=
True
)
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
min_level
=
min_level
model_config
.
max_level
=
max_level
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'nasfpn'
,
nasfpn
=
decoders_cfg
.
NASFPN
(
num_filters
=
num_filters
,
num_repeats
=
num_repeats
,
use_separable_conv
=
use_separable_conv
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
self
.
assertEqual
(
network_config
,
factory_network_config
)
@
combinations
.
generate
(
combinations
.
combine
(
level
=
[
3
,
4
],
dilation_rates
=
[[
6
,
12
,
18
],
[
6
,
12
]],
num_filters
=
[
128
,
256
]))
def
test_aspp_decoder_creation
(
self
,
level
,
dilation_rates
,
num_filters
):
"""Test creation of ASPP decoder."""
input_specs
=
{
'1'
:
tf
.
TensorShape
([
1
,
128
,
128
,
3
])}
network
=
decoders
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
,
use_sync_bn
=
True
)
model_config
=
configs
.
semantic_segmentation
.
SemanticSegmentationModel
()
model_config
.
num_classes
=
10
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'aspp'
,
aspp
=
decoders_cfg
.
ASPP
(
level
=
level
,
dilation_rates
=
dilation_rates
,
num_filters
=
num_filters
))
factory_network
=
factory
.
build_decoder
(
input_specs
=
input_specs
,
model_config
=
model_config
)
network_config
=
network
.
get_config
()
factory_network_config
=
factory_network
.
get_config
()
# Due to calling `super().get_config()` in aspp layer, everything but the
# the name of two layer instances are the same, so we force equal name so it
# will not give false alarm.
factory_network_config
[
'name'
]
=
network_config
[
'name'
]
self
.
assertEqual
(
network_config
,
factory_network_config
)
def
test_identity_decoder_creation
(
self
):
"""Test creation of identity decoder."""
model_config
=
configs
.
retinanet
.
RetinaNet
()
model_config
.
num_classes
=
2
model_config
.
input_size
=
[
None
,
None
,
3
]
model_config
.
decoder
=
decoders_cfg
.
Decoder
(
type
=
'identity'
,
identity
=
decoders_cfg
.
Identity
())
factory_network
=
factory
.
build_decoder
(
input_specs
=
None
,
model_config
=
model_config
)
self
.
assertIsNone
(
factory_network
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/fpn.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Feature Pyramid Networks (FPN)."""
from
typing
import
Any
,
Mapping
,
Optional
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
FPN
(
tf
.
keras
.
Model
):
"""Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
num_filters
:
int
=
256
,
fusion_type
:
str
=
'sum'
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a Feature Pyramid Network (FPN).
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
num_filters: An `int` number of filters in FPN layers.
fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
concat for feature fusion.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'fusion_type'
:
fusion_type
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
use_separable_conv
:
conv2d
=
tf
.
keras
.
layers
.
SeparableConv2D
else
:
conv2d
=
tf
.
keras
.
layers
.
Conv2D
if
use_sync_bn
:
norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
norm
=
tf
.
keras
.
layers
.
BatchNormalization
activation_fn
=
tf
.
keras
.
layers
.
Activation
(
tf_utils
.
get_activation
(
activation
))
# Build input feature pyramid.
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
# Get input feature pyramid from backbone.
logging
.
info
(
'FPN input_specs: %s'
,
input_specs
)
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
backbone_max_level
=
min
(
int
(
max
(
inputs
.
keys
())),
max_level
)
# Build lateral connections.
feats_lateral
=
{}
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats_lateral
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
inputs
[
str
(
level
)])
# Build top-down path.
feats
=
{
str
(
backbone_max_level
):
feats_lateral
[
str
(
backbone_max_level
)]}
for
level
in
range
(
backbone_max_level
-
1
,
min_level
-
1
,
-
1
):
feat_a
=
spatial_transform_ops
.
nearest_upsampling
(
feats
[
str
(
level
+
1
)],
2
)
feat_b
=
feats_lateral
[
str
(
level
)]
if
fusion_type
==
'sum'
:
feats
[
str
(
level
)]
=
feat_a
+
feat_b
elif
fusion_type
==
'concat'
:
feats
[
str
(
level
)]
=
tf
.
concat
([
feat_a
,
feat_b
],
axis
=-
1
)
else
:
raise
ValueError
(
'Fusion type {} not supported.'
.
format
(
fusion_type
))
# TODO(xianzhi): consider to remove bias in conv2d.
# Build post-hoc 3x3 convolution kernel.
for
level
in
range
(
min_level
,
backbone_max_level
+
1
):
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
1
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats
[
str
(
level
)])
# TODO(xianzhi): consider to remove bias in conv2d.
# Build coarser FPN levels introduced for RetinaNet.
for
level
in
range
(
backbone_max_level
+
1
,
max_level
+
1
):
feats_in
=
feats
[
str
(
level
-
1
)]
if
level
>
backbone_max_level
+
1
:
feats_in
=
activation_fn
(
feats_in
)
feats
[
str
(
level
)]
=
conv2d
(
filters
=
num_filters
,
strides
=
2
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
kernel_initializer
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
feats_in
)
# Apply batch norm layers.
for
level
in
range
(
min_level
,
max_level
+
1
):
feats
[
str
(
level
)]
=
norm
(
axis
=
bn_axis
,
momentum
=
norm_momentum
,
epsilon
=
norm_epsilon
)(
feats
[
str
(
level
)])
self
.
_output_specs
=
{
str
(
level
):
feats
[
str
(
level
)].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
super
(
FPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'fpn'
)
def
build_fpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'fpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `fpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
FPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
fusion_type
=
decoder_cfg
.
fusion_type
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/fpn_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
mobilenet
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
fpn
class
FPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
,
'sum'
),
(
256
,
3
,
7
,
True
,
'concat'
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
,
fusion_type
):
"""Test creation of FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
fusion_type
=
fusion_type
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation_with_mobilenet
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of FPN with mobilenet backbone."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
backbone
=
mobilenet
.
MobileNet
(
model_id
=
'MobileNetV2'
)
network
=
fpn
.
FPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
256
],
feats
[
str
(
level
)].
shape
.
as_list
())
def
test_serialize_deserialize
(
self
):
# Create a network object that sets all of its config options.
kwargs
=
dict
(
input_specs
=
resnet
.
ResNet
(
model_id
=
50
).
output_specs
,
min_level
=
3
,
max_level
=
7
,
num_filters
=
256
,
fusion_type
=
'sum'
,
use_separable_conv
=
False
,
use_sync_bn
=
False
,
activation
=
'relu'
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
network
=
fpn
.
FPN
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
network
.
get_config
(),
expected_config
)
# Create another network object from the first object's config.
new_network
=
fpn
.
FPN
.
from_config
(
network
.
get_config
())
# Validate that the config can be forced to JSON.
_
=
new_network
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
network
.
get_config
(),
new_network
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/decoders/nasfpn.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from
typing
import
Any
,
List
,
Mapping
,
Optional
,
Tuple
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
tf_utils
from
official.vision.modeling.decoders
import
factory
from
official.vision.ops
import
spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS
=
[
(
4
,
'attention'
,
(
1
,
3
),
False
),
(
4
,
'sum'
,
(
1
,
5
),
False
),
(
3
,
'sum'
,
(
0
,
6
),
True
),
(
4
,
'sum'
,
(
6
,
7
),
True
),
(
5
,
'attention'
,
(
7
,
8
),
True
),
(
7
,
'attention'
,
(
6
,
9
),
True
),
(
6
,
'attention'
,
(
9
,
10
),
True
),
]
class
BlockSpec
():
"""A container class that specifies the block configuration for NAS-FPN."""
def
__init__
(
self
,
level
:
int
,
combine_fn
:
str
,
input_offsets
:
Tuple
[
int
,
int
],
is_output
:
bool
):
self
.
level
=
level
self
.
combine_fn
=
combine_fn
self
.
input_offsets
=
input_offsets
self
.
is_output
=
is_output
def
build_block_specs
(
block_specs
:
Optional
[
List
[
Tuple
[
Any
,
...]]]
=
None
)
->
List
[
BlockSpec
]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if
not
block_specs
:
block_specs
=
NASFPN_BLOCK_SPECS
logging
.
info
(
'Building NAS-FPN block specs: %s'
,
block_specs
)
return
[
BlockSpec
(
*
b
)
for
b
in
block_specs
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
NASFPN
(
tf
.
keras
.
Model
):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def
__init__
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
=
3
,
max_level
:
int
=
7
,
block_specs
:
List
[
BlockSpec
]
=
build_block_specs
(),
num_filters
:
int
=
256
,
num_repeats
:
int
=
5
,
use_separable_conv
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_initializer
:
str
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self
.
_config_dict
=
{
'input_specs'
:
input_specs
,
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'num_filters'
:
num_filters
,
'num_repeats'
:
num_repeats
,
'use_separable_conv'
:
use_separable_conv
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_initializer'
:
kernel_initializer
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_block_specs
=
block_specs
self
.
_num_repeats
=
num_repeats
self
.
_conv_op
=
(
tf
.
keras
.
layers
.
SeparableConv2D
if
self
.
_config_dict
[
'use_separable_conv'
]
else
tf
.
keras
.
layers
.
Conv2D
)
if
self
.
_config_dict
[
'use_separable_conv'
]:
self
.
_conv_kwargs
=
{
'depthwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'pointwise_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'depthwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'pointwise_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
else
:
self
.
_conv_kwargs
=
{
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
}
self
.
_norm_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_norm_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
# Gets input feature pyramid from backbone.
inputs
=
self
.
_build_input_pyramid
(
input_specs
,
min_level
)
# Projects the input features.
feats
=
[]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
):
if
str
(
level
)
in
inputs
.
keys
():
feats
.
append
(
self
.
_resample_feature_map
(
inputs
[
str
(
level
)],
level
,
level
,
self
.
_config_dict
[
'num_filters'
]))
else
:
feats
.
append
(
self
.
_resample_feature_map
(
feats
[
-
1
],
level
-
1
,
level
,
self
.
_config_dict
[
'num_filters'
]))
# Repeatly builds the NAS-FPN modules.
for
_
in
range
(
self
.
_num_repeats
):
output_feats
=
self
.
_build_feature_pyramid
(
feats
)
feats
=
[
output_feats
[
level
]
for
level
in
range
(
self
.
_min_level
,
self
.
_max_level
+
1
)]
self
.
_output_specs
=
{
str
(
level
):
output_feats
[
level
].
get_shape
()
for
level
in
range
(
min_level
,
max_level
+
1
)
}
output_feats
=
{
str
(
level
):
output_feats
[
level
]
for
level
in
output_feats
.
keys
()}
super
(
NASFPN
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
output_feats
,
**
kwargs
)
def
_build_input_pyramid
(
self
,
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
min_level
:
int
):
assert
isinstance
(
input_specs
,
dict
)
if
min
(
input_specs
.
keys
())
>
str
(
min_level
):
raise
ValueError
(
'Backbone min level should be less or equal to FPN min level'
)
inputs
=
{}
for
level
,
spec
in
input_specs
.
items
():
inputs
[
level
]
=
tf
.
keras
.
Input
(
shape
=
spec
[
1
:])
return
inputs
def
_resample_feature_map
(
self
,
inputs
,
input_level
,
target_level
,
target_num_filters
=
256
):
x
=
inputs
_
,
_
,
_
,
input_num_filters
=
x
.
get_shape
().
as_list
()
if
input_num_filters
!=
target_num_filters
:
x
=
self
.
_conv_op
(
filters
=
target_num_filters
,
kernel_size
=
1
,
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
x
)
x
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
x
)
if
input_level
<
target_level
:
stride
=
int
(
2
**
(
target_level
-
input_level
))
return
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
stride
,
strides
=
stride
,
padding
=
'same'
)(
x
)
if
input_level
>
target_level
:
scale
=
int
(
2
**
(
input_level
-
target_level
))
return
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
scale
)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype
=
tf
.
keras
.
layers
.
Layer
().
dtype_policy
.
compute_dtype
if
(
compute_dtype
is
not
None
)
and
(
x
.
dtype
!=
compute_dtype
):
return
tf
.
cast
(
x
,
dtype
=
compute_dtype
)
else
:
return
x
def
_global_attention
(
self
,
feat0
,
feat1
):
m
=
tf
.
math
.
reduce_max
(
feat0
,
axis
=
[
1
,
2
],
keepdims
=
True
)
m
=
tf
.
math
.
sigmoid
(
m
)
return
feat0
+
feat1
*
m
def
_build_feature_pyramid
(
self
,
feats
):
num_output_connections
=
[
0
]
*
len
(
feats
)
num_output_levels
=
self
.
_max_level
-
self
.
_min_level
+
1
feat_levels
=
list
(
range
(
self
.
_min_level
,
self
.
_max_level
+
1
))
for
i
,
block_spec
in
enumerate
(
self
.
_block_specs
):
new_level
=
block_spec
.
level
# Checks the range of input_offsets.
for
input_offset
in
block_spec
.
input_offsets
:
if
input_offset
>=
len
(
feats
):
raise
ValueError
(
'input_offset ({}) is larger than num feats({})'
.
format
(
input_offset
,
len
(
feats
)))
input0
=
block_spec
.
input_offsets
[
0
]
input1
=
block_spec
.
input_offsets
[
1
]
# Update graph with inputs.
node0
=
feats
[
input0
]
node0_level
=
feat_levels
[
input0
]
num_output_connections
[
input0
]
+=
1
node0
=
self
.
_resample_feature_map
(
node0
,
node0_level
,
new_level
)
node1
=
feats
[
input1
]
node1_level
=
feat_levels
[
input1
]
num_output_connections
[
input1
]
+=
1
node1
=
self
.
_resample_feature_map
(
node1
,
node1_level
,
new_level
)
# Combine node0 and node1 to create new feat.
if
block_spec
.
combine_fn
==
'sum'
:
new_node
=
node0
+
node1
elif
block_spec
.
combine_fn
==
'attention'
:
if
node0_level
>=
node1_level
:
new_node
=
self
.
_global_attention
(
node0
,
node1
)
else
:
new_node
=
self
.
_global_attention
(
node1
,
node0
)
else
:
raise
ValueError
(
'unknown combine_fn `{}`.'
.
format
(
block_spec
.
combine_fn
))
# Add intermediate nodes that do not have any connections to output.
if
block_spec
.
is_output
:
for
j
,
(
feat
,
feat_level
,
num_output
)
in
enumerate
(
zip
(
feats
,
feat_levels
,
num_output_connections
)):
if
num_output
==
0
and
feat_level
==
new_level
:
num_output_connections
[
j
]
+=
1
feat_
=
self
.
_resample_feature_map
(
feat
,
feat_level
,
new_level
)
new_node
+=
feat_
new_node
=
self
.
_activation
(
new_node
)
new_node
=
self
.
_conv_op
(
filters
=
self
.
_config_dict
[
'num_filters'
],
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
**
self
.
_conv_kwargs
)(
new_node
)
new_node
=
self
.
_norm_op
(
**
self
.
_norm_kwargs
)(
new_node
)
feats
.
append
(
new_node
)
feat_levels
.
append
(
new_level
)
num_output_connections
.
append
(
0
)
output_feats
=
{}
for
i
in
range
(
len
(
feats
)
-
num_output_levels
,
len
(
feats
)):
level
=
feat_levels
[
i
]
output_feats
[
level
]
=
feats
[
i
]
logging
.
info
(
'Output feature pyramid: %s'
,
output_feats
)
return
output_feats
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
factory
.
register_decoder_builder
(
'nasfpn'
)
def
build_nasfpn_decoder
(
input_specs
:
Mapping
[
str
,
tf
.
TensorShape
],
model_config
:
hyperparams
.
Config
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type
=
model_config
.
decoder
.
type
decoder_cfg
=
model_config
.
decoder
.
get
()
if
decoder_type
!=
'nasfpn'
:
raise
ValueError
(
f
'Inconsistent decoder type
{
decoder_type
}
. '
'Need to be `nasfpn`.'
)
norm_activation_config
=
model_config
.
norm_activation
return
NASFPN
(
input_specs
=
input_specs
,
min_level
=
model_config
.
min_level
,
max_level
=
model_config
.
max_level
,
num_filters
=
decoder_cfg
.
num_filters
,
num_repeats
=
decoder_cfg
.
num_repeats
,
use_separable_conv
=
decoder_cfg
.
use_separable_conv
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
official/vision/modeling/decoders/nasfpn_test.py
0 → 100644
View file @
0225b135
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for NAS-FPN."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.backbones
import
resnet
from
official.vision.modeling.decoders
import
nasfpn
class
NASFPNTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
256
,
3
,
7
,
False
),
(
256
,
3
,
7
,
True
),
)
def
test_network_creation
(
self
,
input_size
,
min_level
,
max_level
,
use_separable_conv
):
"""Test creation of NAS-FPN."""
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
3
),
batch_size
=
1
)
num_filters
=
256
backbone
=
resnet
.
ResNet
(
model_id
=
50
)
network
=
nasfpn
.
NASFPN
(
input_specs
=
backbone
.
output_specs
,
min_level
=
min_level
,
max_level
=
max_level
,
num_filters
=
num_filters
,
use_separable_conv
=
use_separable_conv
)
endpoints
=
backbone
(
inputs
)
feats
=
network
(
endpoints
)
for
level
in
range
(
min_level
,
max_level
+
1
):
self
.
assertIn
(
str
(
level
),
feats
)
self
.
assertAllEqual
(
[
1
,
input_size
//
2
**
level
,
input_size
//
2
**
level
,
num_filters
],
feats
[
str
(
level
)].
shape
.
as_list
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
…
8
9
10
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment