Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
7785dec0
Commit
7785dec0
authored
Feb 01, 2022
by
Yeqing Li
Committed by
A. Unique TensorFlower
Feb 01, 2022
Browse files
Internal change
PiperOrigin-RevId: 425740068
parent
9c93f07c
Changes
72
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
6564 deletions
+0
-6564
official/vision/modeling/heads/instance_heads_test.py
official/vision/modeling/heads/instance_heads_test.py
+0
-135
official/vision/modeling/heads/segmentation_heads.py
official/vision/modeling/heads/segmentation_heads.py
+0
-441
official/vision/modeling/heads/segmentation_heads_test.py
official/vision/modeling/heads/segmentation_heads_test.py
+0
-107
official/vision/modeling/layers/__init__.py
official/vision/modeling/layers/__init__.py
+0
-44
official/vision/modeling/layers/box_sampler.py
official/vision/modeling/layers/box_sampler.py
+0
-93
official/vision/modeling/layers/box_sampler_test.py
official/vision/modeling/layers/box_sampler_test.py
+0
-69
official/vision/modeling/layers/deeplab.py
official/vision/modeling/layers/deeplab.py
+0
-211
official/vision/modeling/layers/deeplab_test.py
official/vision/modeling/layers/deeplab_test.py
+0
-53
official/vision/modeling/layers/detection_generator.py
official/vision/modeling/layers/detection_generator.py
+0
-852
official/vision/modeling/layers/detection_generator_test.py
official/vision/modeling/layers/detection_generator_test.py
+0
-249
official/vision/modeling/layers/mask_sampler.py
official/vision/modeling/layers/mask_sampler.py
+0
-166
official/vision/modeling/layers/mask_sampler_test.py
official/vision/modeling/layers/mask_sampler_test.py
+0
-137
official/vision/modeling/layers/nn_blocks.py
official/vision/modeling/layers/nn_blocks.py
+0
-1511
official/vision/modeling/layers/nn_blocks_3d.py
official/vision/modeling/layers/nn_blocks_3d.py
+0
-286
official/vision/modeling/layers/nn_blocks_3d_test.py
official/vision/modeling/layers/nn_blocks_3d_test.py
+0
-59
official/vision/modeling/layers/nn_blocks_test.py
official/vision/modeling/layers/nn_blocks_test.py
+0
-341
official/vision/modeling/layers/nn_layers.py
official/vision/modeling/layers/nn_layers.py
+0
-1277
official/vision/modeling/layers/nn_layers_test.py
official/vision/modeling/layers/nn_layers_test.py
+0
-419
official/vision/modeling/layers/roi_aligner.py
official/vision/modeling/layers/roi_aligner.py
+0
-72
official/vision/modeling/layers/roi_aligner_test.py
official/vision/modeling/layers/roi_aligner_test.py
+0
-42
No files found.
official/vision/modeling/heads/instance_heads_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for instance_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
instance_heads
class
DetectionHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
0
,
0
,
False
,
False
),
(
0
,
1
,
False
,
False
),
(
1
,
0
,
False
,
False
),
(
1
,
1
,
False
,
False
),
)
def
test_forward
(
self
,
num_convs
,
num_fcs
,
use_separable_conv
,
use_sync_bn
):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
3
,
num_convs
=
num_convs
,
num_filters
=
16
,
use_separable_conv
=
use_separable_conv
,
num_fcs
=
num_fcs
,
fc_dims
=
4
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
128
,
128
,
16
)
scores
,
boxes
=
detection_head
(
roi_features
)
self
.
assertAllEqual
(
scores
.
numpy
().
shape
,
[
2
,
10
,
3
])
self
.
assertAllEqual
(
boxes
.
numpy
().
shape
,
[
2
,
10
,
12
])
def
test_serialize_deserialize
(
self
):
detection_head
=
instance_heads
.
DetectionHead
(
num_classes
=
91
,
num_convs
=
0
,
num_filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
detection_head
.
get_config
()
new_detection_head
=
instance_heads
.
DetectionHead
.
from_config
(
config
)
self
.
assertAllEqual
(
detection_head
.
get_config
(),
new_detection_head
.
get_config
())
class
MaskHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
1
,
1
,
False
),
(
1
,
2
,
False
),
(
2
,
1
,
False
),
(
2
,
2
,
False
),
)
def
test_forward
(
self
,
upsample_factor
,
num_convs
,
use_sync_bn
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
upsample_factor
=
upsample_factor
,
num_convs
=
num_convs
,
num_filters
=
16
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
14
,
14
,
16
)
roi_classes
=
np
.
zeros
((
2
,
10
))
masks
=
mask_head
([
roi_features
,
roi_classes
])
self
.
assertAllEqual
(
masks
.
numpy
().
shape
,
[
2
,
10
,
14
*
upsample_factor
,
14
*
upsample_factor
])
def
test_serialize_deserialize
(
self
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
upsample_factor
=
2
,
num_convs
=
1
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
)
config
=
mask_head
.
get_config
()
new_mask_head
=
instance_heads
.
MaskHead
.
from_config
(
config
)
self
.
assertAllEqual
(
mask_head
.
get_config
(),
new_mask_head
.
get_config
())
def
test_forward_class_agnostic
(
self
):
mask_head
=
instance_heads
.
MaskHead
(
num_classes
=
3
,
class_agnostic
=
True
)
roi_features
=
np
.
random
.
rand
(
2
,
10
,
14
,
14
,
16
)
roi_classes
=
np
.
zeros
((
2
,
10
))
masks
=
mask_head
([
roi_features
,
roi_classes
])
self
.
assertAllEqual
(
masks
.
numpy
().
shape
,
[
2
,
10
,
28
,
28
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/heads/segmentation_heads.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of segmentation heads."""
from
typing
import
List
,
Union
,
Optional
,
Mapping
,
Tuple
,
Any
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.modeling.layers
import
nn_layers
from
official.vision.ops
import
spatial_transform_ops
class
MaskScoring
(
tf
.
keras
.
Model
):
"""Creates a mask scoring layer.
This implements mask scoring layer from the paper:
Zhaojin Huang, Lichao Huang, Yongchao Gong, Chang Huang, Xinggang Wang.
Mask Scoring R-CNN.
(https://arxiv.org/pdf/1903.00241.pdf)
"""
def
__init__
(
self
,
num_classes
:
int
,
fc_input_size
:
List
[
int
],
num_convs
:
int
=
3
,
num_filters
:
int
=
256
,
fc_dims
:
int
=
1024
,
num_fcs
:
int
=
2
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes mask scoring layer.
Args:
num_classes: An `int` for number of classes.
fc_input_size: A List of `int` for the input size of the
fully connected layers.
num_convs: An`int` for number of conv layers.
num_filters: An `int` for the number of filters for conv layers.
fc_dims: An `int` number of filters for each fully connected layers.
num_fcs: An `int` for number of fully connected layers.
activation: A `str` name of the activation function.
use_sync_bn: A bool, whether or not to use sync batch normalization.
norm_momentum: A float for the momentum in BatchNorm. Defaults to 0.99.
norm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
MaskScoring
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'fc_input_size'
:
fc_input_size
,
'fc_dims'
:
fc_dims
,
'num_fcs'
:
num_fcs
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'activation'
:
activation
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the mask scoring head."""
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'filters'
:
self
.
_config_dict
[
'num_filters'
],
'kernel_size'
:
3
,
'padding'
:
'same'
,
}
conv_kwargs
.
update
({
'kernel_initializer'
:
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
2
,
mode
=
'fan_out'
,
distribution
=
'untruncated_normal'
),
'bias_initializer'
:
tf
.
zeros_initializer
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
'bias_regularizer'
:
self
.
_config_dict
[
'bias_regularizer'
],
})
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_convs
=
[]
self
.
_conv_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
conv_name
=
'mask-scoring_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
**
conv_kwargs
))
bn_name
=
'mask-scoring-bn_{}'
.
format
(
i
)
self
.
_conv_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_fcs
=
[]
self
.
_fc_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_fcs'
]):
fc_name
=
'mask-scoring-fc_{}'
.
format
(
i
)
self
.
_fcs
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'fc_dims'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(
scale
=
1
/
3.0
,
mode
=
'fan_out'
,
distribution
=
'uniform'
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
fc_name
))
bn_name
=
'mask-scoring-fc-bn_{}'
.
format
(
i
)
self
.
_fc_norms
.
append
(
bn_op
(
name
=
bn_name
,
**
bn_kwargs
))
self
.
_classifier
=
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_config_dict
[
'num_classes'
],
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
],
name
=
'iou-scores'
)
super
(
MaskScoring
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
bool
=
None
):
"""Forward pass mask scoring head.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, width, size, num_classes],
representing the segmentation logits.
training: a `bool` indicating whether it is in `training` mode.
Returns:
mask_scores: A `tf.Tensor` of predicted mask scores
[batch_size, num_classes].
"""
x
=
tf
.
stop_gradient
(
inputs
)
for
conv
,
bn
in
zip
(
self
.
_convs
,
self
.
_conv_norms
):
x
=
conv
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
# Casts feat to float32 so the resize op can be run on TPU.
x
=
tf
.
cast
(
x
,
tf
.
float32
)
x
=
tf
.
image
.
resize
(
x
,
size
=
self
.
_config_dict
[
'fc_input_size'
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
# Casts it back to be compatible with the rest opetations.
x
=
tf
.
cast
(
x
,
inputs
.
dtype
)
_
,
h
,
w
,
filters
=
x
.
get_shape
().
as_list
()
x
=
tf
.
reshape
(
x
,
[
-
1
,
h
*
w
*
filters
])
for
fc
,
bn
in
zip
(
self
.
_fcs
,
self
.
_fc_norms
):
x
=
fc
(
x
)
x
=
bn
(
x
)
x
=
self
.
_activation
(
x
)
ious
=
self
.
_classifier
(
x
)
return
ious
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SegmentationHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a segmentation head."""
def
__init__
(
self
,
num_classes
:
int
,
level
:
Union
[
int
,
str
],
num_convs
:
int
=
2
,
num_filters
:
int
=
256
,
use_depthwise_convolution
:
bool
=
False
,
prediction_kernel_size
:
int
=
1
,
upsample_factor
:
int
=
1
,
feature_fusion
:
Optional
[
str
]
=
None
,
decoder_min_level
:
Optional
[
int
]
=
None
,
decoder_max_level
:
Optional
[
int
]
=
None
,
low_level
:
int
=
2
,
low_level_num_filters
:
int
=
48
,
num_decoder_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a segmentation head.
Args:
num_classes: An `int` number of mask classification categories. The number
of classes does not include background class.
level: An `int` or `str`, level to use to build segmentation head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level.
decoder_min_level: An `int` of minimum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
decoder_max_level: An `int` of maximum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
SegmentationHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'num_classes'
:
num_classes
,
'level'
:
level
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'prediction_kernel_size'
:
prediction_kernel_size
,
'upsample_factor'
:
upsample_factor
,
'feature_fusion'
:
feature_fusion
,
'decoder_min_level'
:
decoder_min_level
,
'decoder_max_level'
:
decoder_max_level
,
'low_level'
:
low_level
,
'low_level_num_filters'
:
low_level_num_filters
,
'num_decoder_filters'
:
num_decoder_filters
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the segmentation head."""
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
]
random_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
)
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'kernel_size'
:
3
if
not
use_depthwise_convolution
else
1
,
'padding'
:
'same'
,
'use_bias'
:
False
,
'kernel_initializer'
:
random_initializer
,
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
}
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
if
self
.
_config_dict
[
'feature_fusion'
]
==
'deeplabv3plus'
:
# Deeplabv3+ feature fusion layers.
self
.
_dlv3p_conv
=
conv_op
(
kernel_size
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
name
=
'segmentation_head_deeplabv3p_fusion_conv'
,
filters
=
self
.
_config_dict
[
'low_level_num_filters'
])
self
.
_dlv3p_norm
=
bn_op
(
name
=
'segmentation_head_deeplabv3p_fusion_norm'
,
**
bn_kwargs
)
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'panoptic_fpn_fusion'
:
self
.
_panoptic_fpn_fusion
=
nn_layers
.
PanopticFPNFusion
(
min_level
=
self
.
_config_dict
[
'decoder_min_level'
],
max_level
=
self
.
_config_dict
[
'decoder_max_level'
],
target_level
=
self
.
_config_dict
[
'level'
],
num_filters
=
self
.
_config_dict
[
'num_filters'
],
num_fpn_filters
=
self
.
_config_dict
[
'num_decoder_filters'
],
activation
=
self
.
_config_dict
[
'activation'
],
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
# Segmentation head layers.
self
.
_convs
=
[]
self
.
_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
use_depthwise_convolution
:
self
.
_convs
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
name
=
'segmentation_head_depthwise_conv_{}'
.
format
(
i
),
kernel_size
=
3
,
padding
=
'same'
,
use_bias
=
False
,
depthwise_initializer
=
random_initializer
,
depthwise_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
depth_multiplier
=
1
))
norm_name
=
'segmentation_head_depthwise_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
conv_name
=
'segmentation_head_conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
filters
=
self
.
_config_dict
[
'num_filters'
],
**
conv_kwargs
))
norm_name
=
'segmentation_head_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
self
.
_classifier
=
conv_op
(
name
=
'segmentation_output'
,
filters
=
self
.
_config_dict
[
'num_classes'
],
kernel_size
=
self
.
_config_dict
[
'prediction_kernel_size'
],
padding
=
'same'
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
super
().
build
(
input_shape
)
def
call
(
self
,
inputs
:
Tuple
[
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]],
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]]):
"""Forward pass of the segmentation head.
It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone endpoints, and the second is decoder endpoints. When inputs are
tensors, they are from a single level of feature maps. When inputs are
dictionaries, they contain multiple levels of feature maps, where the key
is the index of feature map.
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
The first is backbone endpoints, and the second is decoder endpoints.
Returns:
segmentation prediction mask: A `tf.Tensor` of the segmentation mask
scores predicted from input features.
"""
backbone_output
=
inputs
[
0
]
decoder_output
=
inputs
[
1
]
if
self
.
_config_dict
[
'feature_fusion'
]
==
'deeplabv3plus'
:
# deeplabv3+ feature fusion
x
=
decoder_output
[
str
(
self
.
_config_dict
[
'level'
])]
if
isinstance
(
decoder_output
,
dict
)
else
decoder_output
y
=
backbone_output
[
str
(
self
.
_config_dict
[
'low_level'
])]
if
isinstance
(
backbone_output
,
dict
)
else
backbone_output
y
=
self
.
_dlv3p_norm
(
self
.
_dlv3p_conv
(
y
))
y
=
self
.
_activation
(
y
)
x
=
tf
.
image
.
resize
(
x
,
tf
.
shape
(
y
)[
1
:
3
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
x
=
tf
.
cast
(
x
,
dtype
=
y
.
dtype
)
x
=
tf
.
concat
([
x
,
y
],
axis
=
self
.
_bn_axis
)
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'pyramid_fusion'
:
if
not
isinstance
(
decoder_output
,
dict
):
raise
ValueError
(
'Only support dictionary decoder_output.'
)
x
=
nn_layers
.
pyramid_feature_fusion
(
decoder_output
,
self
.
_config_dict
[
'level'
])
elif
self
.
_config_dict
[
'feature_fusion'
]
==
'panoptic_fpn_fusion'
:
x
=
self
.
_panoptic_fpn_fusion
(
decoder_output
)
else
:
x
=
decoder_output
[
str
(
self
.
_config_dict
[
'level'
])]
if
isinstance
(
decoder_output
,
dict
)
else
decoder_output
for
conv
,
norm
in
zip
(
self
.
_convs
,
self
.
_norms
):
x
=
conv
(
x
)
x
=
norm
(
x
)
x
=
self
.
_activation
(
x
)
if
self
.
_config_dict
[
'upsample_factor'
]
>
1
:
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
self
.
_config_dict
[
'upsample_factor'
])
return
self
.
_classifier
(
x
)
def
get_config
(
self
):
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
self
.
_config_dict
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/heads/segmentation_heads_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for segmentation_heads.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.heads
import
segmentation_heads
class
SegmentationHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
2
,
'pyramid_fusion'
,
None
,
None
),
(
3
,
'pyramid_fusion'
,
None
,
None
),
(
2
,
'panoptic_fpn_fusion'
,
2
,
5
),
(
2
,
'panoptic_fpn_fusion'
,
2
,
6
),
(
3
,
'panoptic_fpn_fusion'
,
3
,
5
),
(
3
,
'panoptic_fpn_fusion'
,
3
,
6
))
def
test_forward
(
self
,
level
,
feature_fusion
,
decoder_min_level
,
decoder_max_level
):
backbone_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
16
),
}
decoder_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
64
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
64
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
64
),
'6'
:
np
.
random
.
rand
(
2
,
16
,
16
,
64
),
}
if
feature_fusion
==
'panoptic_fpn_fusion'
:
backbone_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
16
)
decoder_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
64
)
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
10
,
level
=
level
,
feature_fusion
=
feature_fusion
,
decoder_min_level
=
decoder_min_level
,
decoder_max_level
=
decoder_max_level
,
num_decoder_filters
=
64
)
logits
=
head
((
backbone_features
,
decoder_features
))
if
level
in
decoder_features
:
self
.
assertAllEqual
(
logits
.
numpy
().
shape
,
[
2
,
decoder_features
[
str
(
level
)].
shape
[
1
],
decoder_features
[
str
(
level
)].
shape
[
2
],
10
])
def
test_serialize_deserialize
(
self
):
head
=
segmentation_heads
.
SegmentationHead
(
num_classes
=
10
,
level
=
3
)
config
=
head
.
get_config
()
new_head
=
segmentation_heads
.
SegmentationHead
.
from_config
(
config
)
self
.
assertAllEqual
(
head
.
get_config
(),
new_head
.
get_config
())
class
MaskScoringHeadTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
1
,
1
,
64
,
[
4
,
4
]),
(
2
,
1
,
64
,
[
4
,
4
]),
(
3
,
1
,
64
,
[
4
,
4
]),
(
1
,
2
,
32
,
[
8
,
8
]),
(
2
,
2
,
32
,
[
8
,
8
]),
(
3
,
2
,
32
,
[
8
,
8
]),)
def
test_forward
(
self
,
num_convs
,
num_fcs
,
num_filters
,
fc_input_size
):
features
=
np
.
random
.
rand
(
2
,
64
,
64
,
16
)
head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
2
,
num_convs
=
num_convs
,
num_filters
=
num_filters
,
fc_dims
=
128
,
fc_input_size
=
fc_input_size
)
scores
=
head
(
features
)
self
.
assertAllEqual
(
scores
.
numpy
().
shape
,
[
2
,
2
])
def
test_serialize_deserialize
(
self
):
head
=
segmentation_heads
.
MaskScoring
(
num_classes
=
2
,
fc_input_size
=
[
4
,
4
],
fc_dims
=
128
)
config
=
head
.
get_config
()
new_head
=
segmentation_heads
.
MaskScoring
.
from_config
(
config
)
self
.
assertAllEqual
(
head
.
get_config
(),
new_head
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/__init__.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Layers package definition."""
from
official.vision.modeling.layers.box_sampler
import
BoxSampler
from
official.vision.modeling.layers.detection_generator
import
DetectionGenerator
from
official.vision.modeling.layers.detection_generator
import
MultilevelDetectionGenerator
from
official.vision.modeling.layers.mask_sampler
import
MaskSampler
from
official.vision.modeling.layers.nn_blocks
import
BottleneckBlock
from
official.vision.modeling.layers.nn_blocks
import
BottleneckResidualInner
from
official.vision.modeling.layers.nn_blocks
import
DepthwiseSeparableConvBlock
from
official.vision.modeling.layers.nn_blocks
import
InvertedBottleneckBlock
from
official.vision.modeling.layers.nn_blocks
import
ResidualBlock
from
official.vision.modeling.layers.nn_blocks
import
ResidualInner
from
official.vision.modeling.layers.nn_blocks
import
ReversibleLayer
from
official.vision.modeling.layers.nn_blocks_3d
import
BottleneckBlock3D
from
official.vision.modeling.layers.nn_blocks_3d
import
SelfGating
from
official.vision.modeling.layers.nn_layers
import
CausalConvMixin
from
official.vision.modeling.layers.nn_layers
import
Conv2D
from
official.vision.modeling.layers.nn_layers
import
Conv3D
from
official.vision.modeling.layers.nn_layers
import
DepthwiseConv2D
from
official.vision.modeling.layers.nn_layers
import
GlobalAveragePool3D
from
official.vision.modeling.layers.nn_layers
import
PositionalEncoding
from
official.vision.modeling.layers.nn_layers
import
Scale
from
official.vision.modeling.layers.nn_layers
import
SpatialAveragePool3D
from
official.vision.modeling.layers.nn_layers
import
SqueezeExcitation
from
official.vision.modeling.layers.nn_layers
import
StochasticDepth
from
official.vision.modeling.layers.nn_layers
import
TemporalSoftmaxPool
from
official.vision.modeling.layers.roi_aligner
import
MultilevelROIAligner
from
official.vision.modeling.layers.roi_generator
import
MultilevelROIGenerator
from
official.vision.modeling.layers.roi_sampler
import
ROISampler
official/vision/modeling/layers/box_sampler.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of box sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.ops
import
sampling_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BoxSampler
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a BoxSampler to sample positive and negative boxes."""
def
__init__
(
self
,
num_samples
:
int
=
512
,
foreground_fraction
:
float
=
0.25
,
**
kwargs
):
"""Initializes a box sampler.
Args:
num_samples: An `int` of the number of sampled boxes per image.
foreground_fraction: A `float` in [0, 1], what percentage of boxes should
be sampled from the positive examples.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'num_samples'
:
num_samples
,
'foreground_fraction'
:
foreground_fraction
,
}
super
(
BoxSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
positive_matches
:
tf
.
Tensor
,
negative_matches
:
tf
.
Tensor
,
ignored_matches
:
tf
.
Tensor
):
"""Samples and selects positive and negative instances.
Args:
positive_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance should
be ignored.
Returns:
A `tf.tensor` of shape of [batch_size, K], storing the indices of the
sampled examples, where K is `num_samples`.
"""
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_matches
,
negative_matches
),
tf
.
logical_not
(
ignored_matches
))
sampler
=
sampling_ops
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
self
.
_config_dict
[
'foreground_fraction'
],
is_static
=
True
)
batch_size
=
sample_candidates
.
shape
[
0
]
sampled_indicators
=
[]
for
i
in
range
(
batch_size
):
sampled_indicator
=
sampler
.
subsample
(
sample_candidates
[
i
],
self
.
_config_dict
[
'num_samples'
],
positive_matches
[
i
])
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
_
,
selected_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
sampled_indicators
,
dtype
=
tf
.
int32
),
k
=
self
.
_config_dict
[
'num_samples'
],
sorted
=
True
)
return
selected_indices
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/layers/box_sampler_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for roi_sampler.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
box_sampler
class
BoxSamplerTest
(
tf
.
test
.
TestCase
):
def
test_box_sampler
(
self
):
positive_matches
=
np
.
array
(
[[
True
,
False
,
False
,
False
,
True
,
True
,
False
],
[
False
,
False
,
False
,
False
,
False
,
True
,
True
]])
negative_matches
=
np
.
array
(
[[
False
,
True
,
True
,
True
,
False
,
False
,
False
],
[
True
,
True
,
True
,
True
,
False
,
False
,
False
]])
ignored_matches
=
np
.
array
(
[[
False
,
False
,
False
,
False
,
False
,
False
,
True
],
[
False
,
False
,
False
,
False
,
True
,
False
,
False
]])
sampler
=
box_sampler
.
BoxSampler
(
num_samples
=
2
,
foreground_fraction
=
0.5
)
# Runs on TPU.
strategy
=
tf
.
distribute
.
TPUStrategy
()
with
strategy
.
scope
():
selected_indices_tpu
=
sampler
(
positive_matches
,
negative_matches
,
ignored_matches
)
self
.
assertEqual
(
2
,
tf
.
shape
(
selected_indices_tpu
)[
1
])
# Runs on CPU.
selected_indices_cpu
=
sampler
(
positive_matches
,
negative_matches
,
ignored_matches
)
self
.
assertEqual
(
2
,
tf
.
shape
(
selected_indices_cpu
)[
1
])
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
num_samples
=
512
,
foreground_fraction
=
0.25
,
)
sampler
=
box_sampler
.
BoxSampler
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
sampler
.
get_config
(),
expected_config
)
new_sampler
=
box_sampler
.
BoxSampler
.
from_config
(
sampler
.
get_config
())
self
.
assertAllEqual
(
sampler
.
get_config
(),
new_sampler
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/deeplab.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layers for DeepLabV3."""
import
tensorflow
as
tf
class
SpatialPyramidPooling
(
tf
.
keras
.
layers
.
Layer
):
"""Implements the Atrous Spatial Pyramid Pooling.
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
"""
def
__init__
(
self
,
output_channels
,
dilation_rates
,
pool_kernel_size
=
None
,
use_sync_bn
=
False
,
batchnorm_momentum
=
0.99
,
batchnorm_epsilon
=
0.001
,
activation
=
'relu'
,
dropout
=
0.5
,
kernel_initializer
=
'glorot_uniform'
,
kernel_regularizer
=
None
,
interpolation
=
'bilinear'
,
use_depthwise_convolution
=
False
,
**
kwargs
):
"""Initializes `SpatialPyramidPooling`.
Args:
output_channels: Number of channels produced by SpatialPyramidPooling.
dilation_rates: A list of integers for parallel dilated conv.
pool_kernel_size: A list of integers or None. If None, global average
pooling is applied, otherwise an average pooling of pool_kernel_size
is applied.
use_sync_bn: A bool, whether or not to use sync batch normalization.
batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
0.99.
batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
activation: A `str` for type of activation to be used. Defaults to 'relu'.
dropout: A float for the dropout rate before output. Defaults to 0.5.
kernel_initializer: Kernel initializer for conv layers. Defaults to
`glorot_uniform`.
kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
interpolation: The interpolation method for upsampling. Defaults to
`bilinear`.
use_depthwise_convolution: Allows spatial pooling to be separable
depthwise convolusions. [Encoder-Decoder with Atrous Separable
Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1802.02611.pdf)
**kwargs: Other keyword arguments for the layer.
"""
super
(
SpatialPyramidPooling
,
self
).
__init__
(
**
kwargs
)
self
.
output_channels
=
output_channels
self
.
dilation_rates
=
dilation_rates
self
.
use_sync_bn
=
use_sync_bn
self
.
batchnorm_momentum
=
batchnorm_momentum
self
.
batchnorm_epsilon
=
batchnorm_epsilon
self
.
activation
=
activation
self
.
dropout
=
dropout
self
.
kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
interpolation
=
interpolation
self
.
input_spec
=
tf
.
keras
.
layers
.
InputSpec
(
ndim
=
4
)
self
.
pool_kernel_size
=
pool_kernel_size
self
.
use_depthwise_convolution
=
use_depthwise_convolution
def
build
(
self
,
input_shape
):
height
=
input_shape
[
1
]
width
=
input_shape
[
2
]
channels
=
input_shape
[
3
]
self
.
aspp_layers
=
[]
if
self
.
use_sync_bn
:
bn_op
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
bn_op
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
-
1
else
:
bn_axis
=
1
conv_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
)
])
self
.
aspp_layers
.
append
(
conv_sequential
)
for
dilation_rate
in
self
.
dilation_rates
:
leading_layers
=
[]
kernel_size
=
(
3
,
3
)
if
self
.
use_depthwise_convolution
:
leading_layers
+=
[
tf
.
keras
.
layers
.
DepthwiseConv2D
(
depth_multiplier
=
1
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
depthwise_regularizer
=
self
.
kernel_regularizer
,
depthwise_initializer
=
self
.
kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
)
]
kernel_size
=
(
1
,
1
)
conv_sequential
=
tf
.
keras
.
Sequential
(
leading_layers
+
[
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
kernel_regularizer
=
self
.
kernel_regularizer
,
kernel_initializer
=
self
.
kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
)])
self
.
aspp_layers
.
append
(
conv_sequential
)
if
self
.
pool_kernel_size
is
None
:
pool_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
GlobalAveragePooling2D
(),
tf
.
keras
.
layers
.
Reshape
((
1
,
1
,
channels
))])
else
:
pool_sequential
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
AveragePooling2D
(
self
.
pool_kernel_size
)])
pool_sequential
.
add
(
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
),
tf
.
keras
.
layers
.
experimental
.
preprocessing
.
Resizing
(
height
,
width
,
interpolation
=
self
.
interpolation
,
dtype
=
tf
.
float32
)
]))
self
.
aspp_layers
.
append
(
pool_sequential
)
self
.
projection
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
kernel_initializer
,
kernel_regularizer
=
self
.
kernel_regularizer
,
use_bias
=
False
),
bn_op
(
axis
=
bn_axis
,
momentum
=
self
.
batchnorm_momentum
,
epsilon
=
self
.
batchnorm_epsilon
),
tf
.
keras
.
layers
.
Activation
(
self
.
activation
),
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
dropout
)])
def
call
(
self
,
inputs
,
training
=
None
):
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
result
=
[]
for
layer
in
self
.
aspp_layers
:
result
.
append
(
tf
.
cast
(
layer
(
inputs
,
training
=
training
),
inputs
.
dtype
))
result
=
tf
.
concat
(
result
,
axis
=-
1
)
result
=
self
.
projection
(
result
,
training
=
training
)
return
result
def
get_config
(
self
):
config
=
{
'output_channels'
:
self
.
output_channels
,
'dilation_rates'
:
self
.
dilation_rates
,
'pool_kernel_size'
:
self
.
pool_kernel_size
,
'use_sync_bn'
:
self
.
use_sync_bn
,
'batchnorm_momentum'
:
self
.
batchnorm_momentum
,
'batchnorm_epsilon'
:
self
.
batchnorm_epsilon
,
'activation'
:
self
.
activation
,
'dropout'
:
self
.
dropout
,
'kernel_initializer'
:
tf
.
keras
.
initializers
.
serialize
(
self
.
kernel_initializer
),
'kernel_regularizer'
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
kernel_regularizer
),
'interpolation'
:
self
.
interpolation
,
}
base_config
=
super
(
SpatialPyramidPooling
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
official/vision/modeling/layers/deeplab_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for ASPP."""
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
from
official.vision.modeling.layers
import
deeplab
@
keras_parameterized
.
run_all_keras_modes
class
DeeplabTest
(
keras_parameterized
.
TestCase
):
@
keras_parameterized
.
parameterized
.
parameters
(
(
None
,),
([
32
,
32
],),
)
def
test_aspp
(
self
,
pool_kernel_size
):
inputs
=
tf
.
keras
.
Input
(
shape
=
(
64
,
64
,
128
),
dtype
=
tf
.
float32
)
layer
=
deeplab
.
SpatialPyramidPooling
(
output_channels
=
256
,
dilation_rates
=
[
6
,
12
,
18
],
pool_kernel_size
=
None
)
output
=
layer
(
inputs
)
self
.
assertAllEqual
([
None
,
64
,
64
,
256
],
output
.
shape
)
def
test_aspp_invalid_shape
(
self
):
inputs
=
tf
.
keras
.
Input
(
shape
=
(
64
,
64
),
dtype
=
tf
.
float32
)
layer
=
deeplab
.
SpatialPyramidPooling
(
output_channels
=
256
,
dilation_rates
=
[
6
,
12
,
18
])
with
self
.
assertRaises
(
ValueError
):
_
=
layer
(
inputs
)
def
test_config_with_custom_name
(
self
):
layer
=
deeplab
.
SpatialPyramidPooling
(
256
,
[
5
],
name
=
'aspp'
)
config
=
layer
.
get_config
()
layer_1
=
deeplab
.
SpatialPyramidPooling
.
from_config
(
config
)
self
.
assertEqual
(
layer_1
.
name
,
layer
.
name
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/detection_generator.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of generators to generate the final detections."""
import
contextlib
from
typing
import
List
,
Optional
,
Mapping
# Import libraries
import
tensorflow
as
tf
from
official.vision.ops
import
box_ops
from
official.vision.ops
import
nms
from
official.vision.ops
import
preprocess_ops
def
_generate_detections_v1
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
soft_nms_sigma
:
Optional
[
float
]
=
None
):
"""Generates the final detections given the model outputs.
The implementation unrolls the batch dimension and process images one by one.
It required the batch dimension to be statically known and it is TPU
compatible.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]` for box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
attributes: None or a dict of (attribute_name, attributes) pairs. Each
attributes is a `tf.Tensor` with shape
`[batch_size, N, num_classes, attribute_size]` or
`[batch_size, N, 1, attribute_size]` for attribute predictions on all
feature levels. The N is the number of total anchors on all levels. Can
be None if no attribute learning is required.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A scalar representing maximum number of boxes retained
over all classes.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0 (which is default), we fall back to standard NMS.
Returns:
nms_boxes: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections, 4]` representing top detected boxes in
`[y1, x1, y2, x2]`.
nms_scores: A `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing sorted confidence scores
for detected boxes. The values are between `[0, 1]`.
nms_classes: An `int` type `tf.Tensor` of shape
`[batch_size, max_num_detections]` representing classes for detected
boxes.
valid_detections: An `int` type `tf.Tensor` of shape `[batch_size]` only the
top `valid_detections` boxes are valid detections.
nms_attributes: None or a dict of (attribute_name, attributes). Each
attribute is a `float` type `tf.Tensor` of shape
`[batch_size, max_num_detections, attribute_size]` representing attribute
predictions for detected boxes. Can be an empty dict if no attribute
learning is required.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
=
scores
.
get_shape
().
as_list
()[
0
]
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
if
attributes
:
nmsed_attributes
=
{
att_name
:
[]
for
att_name
in
attributes
.
keys
()}
else
:
nmsed_attributes
=
{}
for
i
in
range
(
batch_size
):
(
nmsed_boxes_i
,
nmsed_scores_i
,
nmsed_classes_i
,
valid_detections_i
,
nmsed_att_i
)
=
_generate_detections_per_image
(
boxes
[
i
],
scores
[
i
],
attributes
=
{
att_name
:
att
[
i
]
for
att_name
,
att
in
attributes
.
items
()
}
if
attributes
else
{},
pre_nms_top_k
=
pre_nms_top_k
,
pre_nms_score_threshold
=
pre_nms_score_threshold
,
nms_iou_threshold
=
nms_iou_threshold
,
max_num_detections
=
max_num_detections
,
soft_nms_sigma
=
soft_nms_sigma
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
valid_detections
.
append
(
valid_detections_i
)
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
].
append
(
nmsed_att_i
[
att_name
])
nmsed_boxes
=
tf
.
stack
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
stack
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
stack
(
nmsed_classes
,
axis
=
0
)
valid_detections
=
tf
.
stack
(
valid_detections
,
axis
=
0
)
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
]
=
tf
.
stack
(
nmsed_attributes
[
att_name
],
axis
=
0
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
def
_generate_detections_per_image
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
soft_nms_sigma
:
Optional
[
float
]
=
None
):
"""Generates the final detections per image given the model outputs.
Args:
boxes: A `tf.Tensor` with shape `[N, num_classes, 4]` or `[N, 1, 4]`, which
box predictions on all feature levels. The N is the number of total
anchors on all levels.
scores: A `tf.Tensor` with shape `[N, num_classes]`, which stacks class
probability on all feature levels. The N is the number of total anchors on
all levels. The num_classes is the number of classes predicted by the
model. Note that the class_outputs here is the raw score.
attributes: If not None, a dict of `tf.Tensor`. Each value is in shape
`[N, num_classes, attribute_size]` or `[N, 1, attribute_size]` of
attribute predictions on all feature levels. The N is the number of total
anchors on all levels.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
If set to None, `tf.image.non_max_suppression_padded` is called instead.
Returns:
nms_boxes: A `float` tf.Tensor of shape `[max_num_detections, 4]`
representing top detected boxes in `[y1, x1, y2, x2]`.
nms_scores: A `float` tf.Tensor of shape `[max_num_detections]` representing
sorted confidence scores for detected boxes. The values are between [0,
1].
nms_classes: An `int` tf.Tensor of shape `[max_num_detections]` representing
classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [1] only the top
`valid_detections` boxes are valid detections.
nms_attributes: None or a dict. Each value is a `float` tf.Tensor of shape
`[max_num_detections, attribute_size]` representing attribute predictions
for detected boxes. Can be an empty dict if `attributes` is None.
"""
nmsed_boxes
=
[]
nmsed_scores
=
[]
nmsed_classes
=
[]
num_classes_for_box
=
boxes
.
get_shape
().
as_list
()[
1
]
num_classes
=
scores
.
get_shape
().
as_list
()[
1
]
if
attributes
:
nmsed_attributes
=
{
att_name
:
[]
for
att_name
in
attributes
.
keys
()}
else
:
nmsed_attributes
=
{}
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
min
(
num_classes_for_box
-
1
,
i
)]
scores_i
=
scores
[:,
i
]
# Obtains pre_nms_top_k before running NMS.
scores_i
,
indices
=
tf
.
nn
.
top_k
(
scores_i
,
k
=
tf
.
minimum
(
tf
.
shape
(
scores_i
)[
-
1
],
pre_nms_top_k
))
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
)
if
soft_nms_sigma
is
not
None
:
(
nmsed_indices_i
,
nmsed_scores_i
)
=
tf
.
image
.
non_max_suppression_with_scores
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
soft_nms_sigma
=
soft_nms_sigma
,
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_boxes_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_boxes_i
,
max_num_detections
,
0.0
)
nmsed_scores_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_scores_i
,
max_num_detections
,
-
1.0
)
else
:
(
nmsed_indices_i
,
nmsed_num_valid_i
)
=
tf
.
image
.
non_max_suppression_padded
(
tf
.
cast
(
boxes_i
,
tf
.
float32
),
tf
.
cast
(
scores_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_to_max_output_size
=
True
,
name
=
'nms_detections_'
+
str
(
i
))
nmsed_boxes_i
=
tf
.
gather
(
boxes_i
,
nmsed_indices_i
)
nmsed_scores_i
=
tf
.
gather
(
scores_i
,
nmsed_indices_i
)
# Sets scores of invalid boxes to -1.
nmsed_scores_i
=
tf
.
where
(
tf
.
less
(
tf
.
range
(
max_num_detections
),
[
nmsed_num_valid_i
]),
nmsed_scores_i
,
-
tf
.
ones_like
(
nmsed_scores_i
))
nmsed_classes_i
=
tf
.
fill
([
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
if
attributes
:
for
att_name
,
att
in
attributes
.
items
():
num_classes_for_attr
=
att
.
get_shape
().
as_list
()[
1
]
att_i
=
att
[:,
min
(
num_classes_for_attr
-
1
,
i
)]
att_i
=
tf
.
gather
(
att_i
,
indices
)
nmsed_att_i
=
tf
.
gather
(
att_i
,
nmsed_indices_i
)
nmsed_att_i
=
preprocess_ops
.
clip_or_pad_to_fixed_size
(
nmsed_att_i
,
max_num_detections
,
0.0
)
nmsed_attributes
[
att_name
].
append
(
nmsed_att_i
)
# Concats results from all classes and sort them.
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
0
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
0
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
0
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
)
valid_detections
=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
-
1
),
tf
.
int32
))
if
attributes
:
for
att_name
in
attributes
.
keys
():
nmsed_attributes
[
att_name
]
=
tf
.
concat
(
nmsed_attributes
[
att_name
],
axis
=
0
)
nmsed_attributes
[
att_name
]
=
tf
.
gather
(
nmsed_attributes
[
att_name
],
indices
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
def
_select_top_k_scores
(
scores_in
:
tf
.
Tensor
,
pre_nms_num_detections
:
int
):
"""Selects top_k scores and indices for each class.
Args:
scores_in: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class logit outputs on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model.
pre_nms_num_detections: Number of candidates before NMS.
Returns:
scores and indices: A `tf.Tensor` with shape
`[batch_size, pre_nms_num_detections, num_classes]`.
"""
batch_size
,
num_anchors
,
num_class
=
scores_in
.
get_shape
().
as_list
()
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
scores_in
)[
0
]
scores_trans
=
tf
.
transpose
(
scores_in
,
perm
=
[
0
,
2
,
1
])
scores_trans
=
tf
.
reshape
(
scores_trans
,
[
-
1
,
num_anchors
])
top_k_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores_trans
,
k
=
pre_nms_num_detections
,
sorted
=
True
)
top_k_scores
=
tf
.
reshape
(
top_k_scores
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
top_k_indices
=
tf
.
reshape
(
top_k_indices
,
[
batch_size
,
num_class
,
pre_nms_num_detections
])
return
tf
.
transpose
(
top_k_scores
,
[
0
,
2
,
1
]),
tf
.
transpose
(
top_k_indices
,
[
0
,
2
,
1
])
def
_generate_detections_v2
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
):
"""Generates the final detections given the model outputs.
This implementation unrolls classes dimension while using the tf.while_loop
to implement the batched NMS, so that it can be parallelized at the batch
dimension. It should give better performance comparing to v1 implementation.
It is TPU compatible.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]`, which box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_top_k: An `int` number of top candidate detections per class before
NMS.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
=
[]
nmsed_classes
=
[]
nmsed_scores
=
[]
valid_detections
=
[]
batch_size
,
_
,
num_classes_for_box
,
_
=
boxes
.
get_shape
().
as_list
()
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
boxes
)[
0
]
_
,
total_anchors
,
num_classes
=
scores
.
get_shape
().
as_list
()
# Selects top pre_nms_num scores and indices before NMS.
scores
,
indices
=
_select_top_k_scores
(
scores
,
min
(
total_anchors
,
pre_nms_top_k
))
for
i
in
range
(
num_classes
):
boxes_i
=
boxes
[:,
:,
min
(
num_classes_for_box
-
1
,
i
),
:]
scores_i
=
scores
[:,
:,
i
]
# Obtains pre_nms_top_k before running NMS.
boxes_i
=
tf
.
gather
(
boxes_i
,
indices
[:,
:,
i
],
batch_dims
=
1
,
axis
=
1
)
# Filter out scores.
boxes_i
,
scores_i
=
box_ops
.
filter_boxes_by_scores
(
boxes_i
,
scores_i
,
min_score_threshold
=
pre_nms_score_threshold
)
(
nmsed_scores_i
,
nmsed_boxes_i
)
=
nms
.
sorted_non_max_suppression_padded
(
tf
.
cast
(
scores_i
,
tf
.
float32
),
tf
.
cast
(
boxes_i
,
tf
.
float32
),
max_num_detections
,
iou_threshold
=
nms_iou_threshold
)
nmsed_classes_i
=
tf
.
fill
([
batch_size
,
max_num_detections
],
i
)
nmsed_boxes
.
append
(
nmsed_boxes_i
)
nmsed_scores
.
append
(
nmsed_scores_i
)
nmsed_classes
.
append
(
nmsed_classes_i
)
nmsed_boxes
=
tf
.
concat
(
nmsed_boxes
,
axis
=
1
)
nmsed_scores
=
tf
.
concat
(
nmsed_scores
,
axis
=
1
)
nmsed_classes
=
tf
.
concat
(
nmsed_classes
,
axis
=
1
)
nmsed_scores
,
indices
=
tf
.
nn
.
top_k
(
nmsed_scores
,
k
=
max_num_detections
,
sorted
=
True
)
nmsed_boxes
=
tf
.
gather
(
nmsed_boxes
,
indices
,
batch_dims
=
1
,
axis
=
1
)
nmsed_classes
=
tf
.
gather
(
nmsed_classes
,
indices
,
batch_dims
=
1
)
valid_detections
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
tf
.
greater
(
nmsed_scores
,
0.0
),
tf
.
int32
),
axis
=
1
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
def
_generate_detections_batched
(
boxes
:
tf
.
Tensor
,
scores
:
tf
.
Tensor
,
pre_nms_score_threshold
:
float
,
nms_iou_threshold
:
float
,
max_num_detections
:
int
):
"""Generates detected boxes with scores and classes for one-stage detector.
The function takes output of multi-level ConvNets and anchor boxes and
generates detected boxes. Note that this used batched nms, which is not
supported on TPU currently.
Args:
boxes: A `tf.Tensor` with shape `[batch_size, N, num_classes, 4]` or
`[batch_size, N, 1, 4]`, which box predictions on all feature levels. The
N is the number of total anchors on all levels.
scores: A `tf.Tensor` with shape `[batch_size, N, num_classes]`, which
stacks class probability on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
pre_nms_score_threshold: A `float` representing the threshold for deciding
when to remove boxes based on score.
nms_iou_threshold: A `float` representing the threshold for deciding whether
boxes overlap too much with respect to IOU.
max_num_detections: A `scalar` representing maximum number of boxes retained
over all classes.
Returns:
nms_boxes: A `float` tf.Tensor of shape [batch_size, max_num_detections, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: A `float` tf.Tensor of shape [batch_size, max_num_detections]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: An `int` tf.Tensor of shape [batch_size, max_num_detections]
representing classes for detected boxes.
valid_detections: An `int` tf.Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
with
tf
.
name_scope
(
'generate_detections'
):
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
tf
.
image
.
combined_non_max_suppression
(
boxes
,
scores
,
max_output_size_per_class
=
max_num_detections
,
max_total_size
=
max_num_detections
,
iou_threshold
=
nms_iou_threshold
,
score_threshold
=
pre_nms_score_threshold
,
pad_per_class
=
False
,
clip_boxes
=
False
))
nmsed_classes
=
tf
.
cast
(
nmsed_classes
,
tf
.
int32
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates the final detected boxes with scores and classes."""
def
__init__
(
self
,
apply_nms
:
bool
=
True
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
nms_version
:
str
=
'v2'
,
use_cpu_nms
:
bool
=
False
,
soft_nms_sigma
:
Optional
[
float
]
=
None
,
**
kwargs
):
"""Initializes a detection generator.
Args:
apply_nms: A `bool` of whether or not apply non maximum suppression.
If False, the decoded boxes and their scores are returned.
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are
thrown away.
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
nms_version: A string of `batched`, `v1` or `v2` specifies NMS version.
use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
,
}
super
(
DetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
__call__
(
self
,
raw_boxes
:
tf
.
Tensor
,
raw_scores
:
tf
.
Tensor
,
anchor_boxes
:
tf
.
Tensor
,
image_shape
:
tf
.
Tensor
,
regression_weights
:
Optional
[
List
[
float
]]
=
None
,
bbox_per_class
:
bool
=
True
):
"""Generates final detections.
Args:
raw_boxes: A `tf.Tensor` of shape of `[batch_size, K, num_classes * 4]`
representing the class-specific box coordinates relative to anchors.
raw_scores: A `tf.Tensor` of shape of `[batch_size, K, num_classes]`
representing the class logits before applying score activiation.
anchor_boxes: A `tf.Tensor` of shape of `[batch_size, K, 4]` representing
the corresponding anchor boxes w.r.t `box_outputs`.
image_shape: A `tf.Tensor` of shape of `[batch_size, 2]` storing the image
height and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
regression_weights: A list of four float numbers to scale coordinates.
bbox_per_class: A `bool`. If True, perform per-class box regression.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` `tf.Tensor` of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: A `float` tf.Tensor of shape
[batch, num_raw_boxes] representing socres of all the decoded boxes.
"""
box_scores
=
tf
.
nn
.
softmax
(
raw_scores
,
axis
=-
1
)
# Removes the background class.
box_scores_shape
=
tf
.
shape
(
box_scores
)
box_scores_shape_list
=
box_scores
.
get_shape
().
as_list
()
batch_size
=
box_scores_shape
[
0
]
num_locations
=
box_scores_shape_list
[
1
]
num_classes
=
box_scores_shape_list
[
-
1
]
box_scores
=
tf
.
slice
(
box_scores
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
if
bbox_per_class
:
num_detections
=
num_locations
*
(
num_classes
-
1
)
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
[
batch_size
,
num_locations
,
num_classes
,
4
])
raw_boxes
=
tf
.
slice
(
raw_boxes
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
raw_boxes
=
tf
.
reshape
(
raw_boxes
,
[
batch_size
,
num_detections
,
4
])
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
[
batch_size
,
num_detections
,
4
])
# Box decoding.
decoded_boxes
=
box_ops
.
decode_boxes
(
raw_boxes
,
anchor_boxes
,
weights
=
regression_weights
)
# Box clipping
decoded_boxes
=
box_ops
.
clip_boxes
(
decoded_boxes
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
if
bbox_per_class
:
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
,
[
batch_size
,
num_locations
,
num_classes
-
1
,
4
])
else
:
decoded_boxes
=
tf
.
expand_dims
(
decoded_boxes
,
axis
=
2
)
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
decoded_boxes
,
'decoded_box_scores'
:
box_scores
,
}
# Optionally force the NMS be run on CPU.
if
self
.
_config_dict
[
'use_cpu_nms'
]:
nms_context
=
tf
.
device
(
'cpu:0'
)
else
:
nms_context
=
contextlib
.
nullcontext
()
with
nms_context
:
if
self
.
_config_dict
[
'nms_version'
]
==
'batched'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
decoded_boxes
,
box_scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v1'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
_
)
=
(
_generate_detections_v1
(
decoded_boxes
,
box_scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
],
soft_nms_sigma
=
self
.
_config_dict
[
'soft_nms_sigma'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v2'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_v2
(
decoded_boxes
,
box_scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
else
:
raise
ValueError
(
'NMS version {} not supported.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelDetectionGenerator
(
tf
.
keras
.
layers
.
Layer
):
"""Generates detected boxes with scores and classes for one-stage detector."""
def
__init__
(
self
,
apply_nms
:
bool
=
True
,
pre_nms_top_k
:
int
=
5000
,
pre_nms_score_threshold
:
float
=
0.05
,
nms_iou_threshold
:
float
=
0.5
,
max_num_detections
:
int
=
100
,
nms_version
:
str
=
'v1'
,
use_cpu_nms
:
bool
=
False
,
soft_nms_sigma
:
Optional
[
float
]
=
None
,
**
kwargs
):
"""Initializes a multi-level detection generator.
Args:
apply_nms: A `bool` of whether or not apply non maximum suppression. If
False, the decoded boxes and their scores are returned.
pre_nms_top_k: An `int` of the number of top scores proposals to be kept
before applying NMS.
pre_nms_score_threshold: A `float` of the score threshold to apply before
applying NMS. Proposals whose scores are below this threshold are thrown
away.
nms_iou_threshold: A `float` in [0, 1], the NMS IoU threshold.
max_num_detections: An `int` of the final number of total detections to
generate.
nms_version: A string of `batched`, `v1` or `v2` specifies NMS version
use_cpu_nms: A `bool` of whether or not enforce NMS to run on CPU.
soft_nms_sigma: A `float` representing the sigma parameter for Soft NMS.
When soft_nms_sigma=0.0, we fall back to standard NMS.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'apply_nms'
:
apply_nms
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
nms_iou_threshold
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
,
}
super
(
MultilevelDetectionGenerator
,
self
).
__init__
(
**
kwargs
)
def
_decode_multilevel_outputs
(
self
,
raw_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
raw_scores
:
Mapping
[
str
,
tf
.
Tensor
],
anchor_boxes
:
tf
.
Tensor
,
image_shape
:
tf
.
Tensor
,
raw_attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
):
"""Collects dict of multilevel boxes, scores, attributes into lists."""
boxes
=
[]
scores
=
[]
if
raw_attributes
:
attributes
=
{
att_name
:
[]
for
att_name
in
raw_attributes
.
keys
()}
else
:
attributes
=
{}
levels
=
list
(
raw_boxes
.
keys
())
min_level
=
int
(
min
(
levels
))
max_level
=
int
(
max
(
levels
))
for
i
in
range
(
min_level
,
max_level
+
1
):
raw_boxes_i
=
raw_boxes
[
str
(
i
)]
raw_scores_i
=
raw_scores
[
str
(
i
)]
batch_size
=
tf
.
shape
(
raw_boxes_i
)[
0
]
(
_
,
feature_h_i
,
feature_w_i
,
num_anchors_per_locations_times_4
)
=
raw_boxes_i
.
get_shape
().
as_list
()
num_locations
=
feature_h_i
*
feature_w_i
num_anchors_per_locations
=
num_anchors_per_locations_times_4
//
4
num_classes
=
raw_scores_i
.
get_shape
().
as_list
(
)[
-
1
]
//
num_anchors_per_locations
# Applies score transformation and remove the implicit background class.
scores_i
=
tf
.
sigmoid
(
tf
.
reshape
(
raw_scores_i
,
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
num_classes
]))
scores_i
=
tf
.
slice
(
scores_i
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
# Box decoding.
# The anchor boxes are shared for all data in a batch.
# One stage detector only supports class agnostic box regression.
anchor_boxes_i
=
tf
.
reshape
(
anchor_boxes
[
str
(
i
)],
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
4
])
raw_boxes_i
=
tf
.
reshape
(
raw_boxes_i
,
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
4
])
boxes_i
=
box_ops
.
decode_boxes
(
raw_boxes_i
,
anchor_boxes_i
)
# Box clipping.
boxes_i
=
box_ops
.
clip_boxes
(
boxes_i
,
tf
.
expand_dims
(
image_shape
,
axis
=
1
))
boxes
.
append
(
boxes_i
)
scores
.
append
(
scores_i
)
if
raw_attributes
:
for
att_name
,
raw_att
in
raw_attributes
.
items
():
attribute_size
=
raw_att
[
str
(
i
)].
get_shape
().
as_list
()[
-
1
]
//
num_anchors_per_locations
att_i
=
tf
.
reshape
(
raw_att
[
str
(
i
)],
[
batch_size
,
num_locations
*
num_anchors_per_locations
,
attribute_size
])
attributes
[
att_name
].
append
(
att_i
)
boxes
=
tf
.
concat
(
boxes
,
axis
=
1
)
boxes
=
tf
.
expand_dims
(
boxes
,
axis
=
2
)
scores
=
tf
.
concat
(
scores
,
axis
=
1
)
if
raw_attributes
:
for
att_name
in
raw_attributes
.
keys
():
attributes
[
att_name
]
=
tf
.
concat
(
attributes
[
att_name
],
axis
=
1
)
attributes
[
att_name
]
=
tf
.
expand_dims
(
attributes
[
att_name
],
axis
=
2
)
return
boxes
,
scores
,
attributes
def
__call__
(
self
,
raw_boxes
:
Mapping
[
str
,
tf
.
Tensor
],
raw_scores
:
Mapping
[
str
,
tf
.
Tensor
],
anchor_boxes
:
tf
.
Tensor
,
image_shape
:
tf
.
Tensor
,
raw_attributes
:
Optional
[
Mapping
[
str
,
tf
.
Tensor
]]
=
None
):
"""Generates final detections.
Args:
raw_boxes: A `dict` with keys representing FPN levels and values
representing box tenors of shape `[batch, feature_h, feature_w,
num_anchors * 4]`.
raw_scores: A `dict` with keys representing FPN levels and values
representing logit tensors of shape `[batch, feature_h, feature_w,
num_anchors]`.
anchor_boxes: A `tf.Tensor` of shape of [batch_size, K, 4] representing
the corresponding anchor boxes w.r.t `box_outputs`.
image_shape: A `tf.Tensor` of shape of [batch_size, 2] storing the image
height and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
raw_attributes: If not None, a `dict` of (attribute_name,
attribute_prediction) pairs. `attribute_prediction` is a dict that
contains keys representing FPN levels and values representing tenors of
shape `[batch, feature_h, feature_w, num_anchors * attribute_size]`.
Returns:
If `apply_nms` = True, the return is a dictionary with keys:
`detection_boxes`: A `float` tf.Tensor of shape
[batch, max_num_detections, 4] representing top detected boxes in
[y1, x1, y2, x2].
`detection_scores`: A `float` tf.Tensor of shape
[batch, max_num_detections] representing sorted confidence scores for
detected boxes. The values are between [0, 1].
`detection_classes`: An `int` tf.Tensor of shape
[batch, max_num_detections] representing classes for detected boxes.
`num_detections`: An `int` tf.Tensor of shape [batch] only the first
`num_detections` boxes are valid detections
`detection_attributes`: A dict. Values of the dict is a `float`
tf.Tensor of shape [batch, max_num_detections, attribute_size]
representing attribute predictions for detected boxes.
If `apply_nms` = False, the return is a dictionary with keys:
`decoded_boxes`: A `float` tf.Tensor of shape [batch, num_raw_boxes, 4]
representing all the decoded boxes.
`decoded_box_scores`: A `float` tf.Tensor of shape
[batch, num_raw_boxes] representing socres of all the decoded boxes.
`decoded_box_attributes`: A dict. Values in the dict is a
`float` tf.Tensor of shape [batch, num_raw_boxes, attribute_size]
representing attribute predictions of all the decoded boxes.
"""
boxes
,
scores
,
attributes
=
self
.
_decode_multilevel_outputs
(
raw_boxes
,
raw_scores
,
anchor_boxes
,
image_shape
,
raw_attributes
)
if
not
self
.
_config_dict
[
'apply_nms'
]:
return
{
'decoded_boxes'
:
boxes
,
'decoded_box_scores'
:
scores
,
'decoded_box_attributes'
:
attributes
,
}
# Optionally force the NMS to run on CPU.
if
self
.
_config_dict
[
'use_cpu_nms'
]:
nms_context
=
tf
.
device
(
'cpu:0'
)
else
:
nms_context
=
contextlib
.
nullcontext
()
with
nms_context
:
if
raw_attributes
and
(
self
.
_config_dict
[
'nms_version'
]
!=
'v1'
):
raise
ValueError
(
'Attribute learning is only supported for NMSv1 but NMS {} is used.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
if
self
.
_config_dict
[
'nms_version'
]
==
'batched'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_batched
(
boxes
,
scores
,
self
.
_config_dict
[
'pre_nms_score_threshold'
],
self
.
_config_dict
[
'nms_iou_threshold'
],
self
.
_config_dict
[
'max_num_detections'
]))
# Set `nmsed_attributes` to None for batched NMS.
nmsed_attributes
=
{}
elif
self
.
_config_dict
[
'nms_version'
]
==
'v1'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
,
nmsed_attributes
)
=
(
_generate_detections_v1
(
boxes
,
scores
,
attributes
=
attributes
if
raw_attributes
else
None
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
],
soft_nms_sigma
=
self
.
_config_dict
[
'soft_nms_sigma'
]))
elif
self
.
_config_dict
[
'nms_version'
]
==
'v2'
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
)
=
(
_generate_detections_v2
(
boxes
,
scores
,
pre_nms_top_k
=
self
.
_config_dict
[
'pre_nms_top_k'
],
pre_nms_score_threshold
=
self
.
_config_dict
[
'pre_nms_score_threshold'
],
nms_iou_threshold
=
self
.
_config_dict
[
'nms_iou_threshold'
],
max_num_detections
=
self
.
_config_dict
[
'max_num_detections'
]))
# Set `nmsed_attributes` to None for v2.
nmsed_attributes
=
{}
else
:
raise
ValueError
(
'NMS version {} not supported.'
.
format
(
self
.
_config_dict
[
'nms_version'
]))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
return
{
'num_detections'
:
valid_detections
,
'detection_boxes'
:
nmsed_boxes
,
'detection_classes'
:
nmsed_classes
,
'detection_scores'
:
nmsed_scores
,
'detection_attributes'
:
nmsed_attributes
,
}
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/layers/detection_generator_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for detection_generator.py."""
# Import libraries
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
detection_generator
from
official.vision.ops
import
anchor
class
SelectTopKScoresTest
(
tf
.
test
.
TestCase
):
def
testSelectTopKScores
(
self
):
pre_nms_num_boxes
=
2
scores_data
=
[[[
0.2
,
0.2
],
[
0.1
,
0.9
],
[
0.5
,
0.1
],
[
0.3
,
0.5
]]]
scores_in
=
tf
.
constant
(
scores_data
,
dtype
=
tf
.
float32
)
top_k_scores
,
top_k_indices
=
detection_generator
.
_select_top_k_scores
(
scores_in
,
pre_nms_num_detections
=
pre_nms_num_boxes
)
expected_top_k_scores
=
np
.
array
([[[
0.5
,
0.9
],
[
0.3
,
0.5
]]],
dtype
=
np
.
float32
)
expected_top_k_indices
=
[[[
2
,
1
],
[
3
,
3
]]]
self
.
assertAllEqual
(
top_k_scores
.
numpy
(),
expected_top_k_scores
)
self
.
assertAllEqual
(
top_k_indices
.
numpy
(),
expected_top_k_indices
)
class
DetectionGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
product
(
nms_version
=
[
'batched'
,
'v1'
,
'v2'
],
use_cpu_nms
=
[
True
,
False
],
soft_nms_sigma
=
[
None
,
0.1
])
def
testDetectionsOutputShape
(
self
,
nms_version
,
use_cpu_nms
,
soft_nms_sigma
):
max_num_detections
=
10
num_classes
=
4
pre_nms_top_k
=
5000
pre_nms_score_threshold
=
0.01
batch_size
=
1
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
,
}
generator
=
detection_generator
.
DetectionGenerator
(
**
kwargs
)
cls_outputs_all
=
(
np
.
random
.
rand
(
84
,
num_classes
)
-
0.5
)
*
3
# random 84x3 outputs.
box_outputs_all
=
np
.
random
.
rand
(
84
,
4
*
num_classes
)
# random 84 boxes.
anchor_boxes_all
=
np
.
random
.
rand
(
84
,
4
)
# random 84 boxes.
class_outputs
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
num_classes
])
box_outputs
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
4
*
num_classes
])
anchor_boxes
=
tf
.
reshape
(
tf
.
convert_to_tensor
(
anchor_boxes_all
,
dtype
=
tf
.
float32
),
[
1
,
84
,
4
])
image_info
=
tf
.
constant
(
[[[
1000
,
1000
],
[
100
,
100
],
[
0.1
,
0.1
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
results
=
generator
(
box_outputs
,
class_outputs
,
anchor_boxes
,
image_info
[:,
1
,
:])
boxes
=
results
[
'detection_boxes'
]
classes
=
results
[
'detection_classes'
]
scores
=
results
[
'detection_scores'
]
valid_detections
=
results
[
'num_detections'
]
self
.
assertEqual
(
boxes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,
4
))
self
.
assertEqual
(
scores
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
classes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
valid_detections
.
numpy
().
shape
,
(
batch_size
,))
def
test_serialize_deserialize
(
self
):
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
1000
,
'pre_nms_score_threshold'
:
0.1
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
10
,
'nms_version'
:
'v2'
,
'use_cpu_nms'
:
False
,
'soft_nms_sigma'
:
None
,
}
generator
=
detection_generator
.
DetectionGenerator
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
(
detection_generator
.
DetectionGenerator
.
from_config
(
generator
.
get_config
()))
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
class
MultilevelDetectionGeneratorTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
'batched'
,
False
,
True
,
None
),
(
'batched'
,
False
,
False
,
None
),
(
'v2'
,
False
,
True
,
None
),
(
'v2'
,
False
,
False
,
None
),
(
'v1'
,
True
,
True
,
0.0
),
(
'v1'
,
True
,
False
,
0.1
),
(
'v1'
,
True
,
False
,
None
),
)
def
testDetectionsOutputShape
(
self
,
nms_version
,
has_att_heads
,
use_cpu_nms
,
soft_nms_sigma
):
min_level
=
4
max_level
=
6
num_scales
=
2
max_num_detections
=
10
aspect_ratios
=
[
1.0
,
2.0
]
anchor_scale
=
2.0
output_size
=
[
64
,
64
]
num_classes
=
4
pre_nms_top_k
=
5000
pre_nms_score_threshold
=
0.01
batch_size
=
1
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
pre_nms_top_k
,
'pre_nms_score_threshold'
:
pre_nms_score_threshold
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
max_num_detections
,
'nms_version'
:
nms_version
,
'use_cpu_nms'
:
use_cpu_nms
,
'soft_nms_sigma'
:
soft_nms_sigma
,
}
input_anchor
=
anchor
.
build_anchor_generator
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_scale
)
anchor_boxes
=
input_anchor
(
output_size
)
cls_outputs_all
=
(
np
.
random
.
rand
(
84
,
num_classes
)
-
0.5
)
*
3
# random 84x3 outputs.
box_outputs_all
=
np
.
random
.
rand
(
84
,
4
)
# random 84 boxes.
class_outputs
=
{
'4'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
0
:
64
],
dtype
=
tf
.
float32
),
[
1
,
8
,
8
,
num_classes
]),
'5'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
64
:
80
],
dtype
=
tf
.
float32
),
[
1
,
4
,
4
,
num_classes
]),
'6'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
cls_outputs_all
[
80
:
84
],
dtype
=
tf
.
float32
),
[
1
,
2
,
2
,
num_classes
]),
}
box_outputs
=
{
'4'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
0
:
64
],
dtype
=
tf
.
float32
),
[
1
,
8
,
8
,
4
]),
'5'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
64
:
80
],
dtype
=
tf
.
float32
),
[
1
,
4
,
4
,
4
]),
'6'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
box_outputs_all
[
80
:
84
],
dtype
=
tf
.
float32
),
[
1
,
2
,
2
,
4
]),
}
if
has_att_heads
:
att_outputs_all
=
np
.
random
.
rand
(
84
,
1
)
# random attributes.
att_outputs
=
{
'depth'
:
{
'4'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
att_outputs_all
[
0
:
64
],
dtype
=
tf
.
float32
),
[
1
,
8
,
8
,
1
]),
'5'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
att_outputs_all
[
64
:
80
],
dtype
=
tf
.
float32
),
[
1
,
4
,
4
,
1
]),
'6'
:
tf
.
reshape
(
tf
.
convert_to_tensor
(
att_outputs_all
[
80
:
84
],
dtype
=
tf
.
float32
),
[
1
,
2
,
2
,
1
]),
}
}
else
:
att_outputs
=
None
image_info
=
tf
.
constant
([[[
1000
,
1000
],
[
100
,
100
],
[
0.1
,
0.1
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
**
kwargs
)
results
=
generator
(
box_outputs
,
class_outputs
,
anchor_boxes
,
image_info
[:,
1
,
:],
att_outputs
)
boxes
=
results
[
'detection_boxes'
]
classes
=
results
[
'detection_classes'
]
scores
=
results
[
'detection_scores'
]
valid_detections
=
results
[
'num_detections'
]
self
.
assertEqual
(
boxes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,
4
))
self
.
assertEqual
(
scores
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
classes
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,))
self
.
assertEqual
(
valid_detections
.
numpy
().
shape
,
(
batch_size
,))
if
has_att_heads
:
for
att
in
results
[
'detection_attributes'
].
values
():
self
.
assertEqual
(
att
.
numpy
().
shape
,
(
batch_size
,
max_num_detections
,
1
))
def
test_serialize_deserialize
(
self
):
kwargs
=
{
'apply_nms'
:
True
,
'pre_nms_top_k'
:
1000
,
'pre_nms_score_threshold'
:
0.1
,
'nms_iou_threshold'
:
0.5
,
'max_num_detections'
:
10
,
'nms_version'
:
'v2'
,
'use_cpu_nms'
:
False
,
'soft_nms_sigma'
:
None
,
}
generator
=
detection_generator
.
MultilevelDetectionGenerator
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
generator
.
get_config
(),
expected_config
)
new_generator
=
(
detection_generator
.
MultilevelDetectionGenerator
.
from_config
(
generator
.
get_config
()))
self
.
assertAllEqual
(
generator
.
get_config
(),
new_generator
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/mask_sampler.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of mask sampler."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.ops
import
spatial_transform_ops
def
_sample_and_crop_foreground_masks
(
candidate_rois
:
tf
.
Tensor
,
candidate_gt_boxes
:
tf
.
Tensor
,
candidate_gt_classes
:
tf
.
Tensor
,
candidate_gt_indices
:
tf
.
Tensor
,
gt_masks
:
tf
.
Tensor
,
num_sampled_masks
:
int
=
128
,
mask_target_size
:
int
=
28
):
"""Samples and creates cropped foreground masks for training.
Args:
candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is the
number of candidate RoIs to be considered for mask sampling. It includes
both positive and negative RoIs. The `num_mask_samples_per_image` positive
RoIs will be sampled to create mask training targets.
candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
the corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing the
corresponding groundtruth classes to the `candidate_rois`. 0 in the tensor
corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i] and
gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >= N, is
the superset of candidate_gt_boxes.
gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
mask_width] containing all the groundtruth masks which sample masks are
drawn from.
num_sampled_masks: An `int` that specifies the number of masks to sample.
mask_target_size: An `int` that specifies the final cropped mask size after
sampling. The output masks are resized w.r.t the sampled RoIs.
Returns:
foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
RoI that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
classes corresponding to the sampled foreground masks.
cropoped_foreground_masks: A `tf.Tensor` of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the cropped
foreground masks used for training.
"""
_
,
fg_instance_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
greater
(
candidate_gt_classes
,
0
),
dtype
=
tf
.
int32
),
k
=
num_sampled_masks
)
fg_instance_indices_shape
=
tf
.
shape
(
fg_instance_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
fg_instance_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
fg_instance_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_instance_indices
=
tf
.
stack
(
[
batch_indices
,
fg_instance_indices
],
axis
=-
1
)
foreground_rois
=
tf
.
gather_nd
(
candidate_rois
,
gather_nd_instance_indices
)
foreground_boxes
=
tf
.
gather_nd
(
candidate_gt_boxes
,
gather_nd_instance_indices
)
foreground_classes
=
tf
.
gather_nd
(
candidate_gt_classes
,
gather_nd_instance_indices
)
foreground_gt_indices
=
tf
.
gather_nd
(
candidate_gt_indices
,
gather_nd_instance_indices
)
foreground_gt_indices
=
tf
.
where
(
tf
.
equal
(
foreground_gt_indices
,
-
1
),
tf
.
zeros_like
(
foreground_gt_indices
),
foreground_gt_indices
)
foreground_gt_indices_shape
=
tf
.
shape
(
foreground_gt_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
foreground_gt_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
foreground_gt_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_gt_indices
=
tf
.
stack
(
[
batch_indices
,
foreground_gt_indices
],
axis
=-
1
)
foreground_masks
=
tf
.
gather_nd
(
gt_masks
,
gather_nd_gt_indices
)
cropped_foreground_masks
=
spatial_transform_ops
.
crop_mask_in_target_box
(
foreground_masks
,
foreground_boxes
,
foreground_rois
,
mask_target_size
,
sample_offset
=
0.5
)
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MaskSampler
(
tf
.
keras
.
layers
.
Layer
):
"""Samples and creates mask training targets."""
def
__init__
(
self
,
mask_target_size
:
int
,
num_sampled_masks
:
int
,
**
kwargs
):
self
.
_config_dict
=
{
'mask_target_size'
:
mask_target_size
,
'num_sampled_masks'
:
num_sampled_masks
,
}
super
(
MaskSampler
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
candidate_rois
:
tf
.
Tensor
,
candidate_gt_boxes
:
tf
.
Tensor
,
candidate_gt_classes
:
tf
.
Tensor
,
candidate_gt_indices
:
tf
.
Tensor
,
gt_masks
:
tf
.
Tensor
):
"""Samples and creates mask targets for training.
Args:
candidate_rois: A `tf.Tensor` of shape of [batch_size, N, 4], where N is
the number of candidate RoIs to be considered for mask sampling. It
includes both positive and negative RoIs. The
`num_mask_samples_per_image` positive RoIs will be sampled to create
mask training targets.
candidate_gt_boxes: A `tf.Tensor` of shape of [batch_size, N, 4], storing
the corresponding groundtruth boxes to the `candidate_rois`.
candidate_gt_classes: A `tf.Tensor` of shape of [batch_size, N], storing
the corresponding groundtruth classes to the `candidate_rois`. 0 in the
tensor corresponds to the background class, i.e. negative RoIs.
candidate_gt_indices: A `tf.Tensor` of shape [batch_size, N], storing the
corresponding groundtruth instance indices to the `candidate_gt_boxes`,
i.e. gt_boxes[candidate_gt_indices[:, i]] = candidate_gt_boxes[:, i],
where gt_boxes which is of shape [batch_size, MAX_INSTANCES, 4], M >=
N, is the superset of candidate_gt_boxes.
gt_masks: A `tf.Tensor` of [batch_size, MAX_INSTANCES, mask_height,
mask_width] containing all the groundtruth masks which sample masks are
drawn from. after sampling. The output masks are resized w.r.t the
sampled RoIs.
Returns:
foreground_rois: A `tf.Tensor` of shape of [batch_size, K, 4] storing the
RoI that corresponds to the sampled foreground masks, where
K = num_mask_samples_per_image.
foreground_classes: A `tf.Tensor` of shape of [batch_size, K] storing the
classes corresponding to the sampled foreground masks.
cropoped_foreground_masks: A `tf.Tensor` of shape of
[batch_size, K, mask_target_size, mask_target_size] storing the
cropped foreground masks used for training.
"""
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
self
.
_config_dict
[
'num_sampled_masks'
],
self
.
_config_dict
[
'mask_target_size'
]))
return
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
official/vision/modeling/layers/mask_sampler_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mask_sampler.py."""
# Import libraries
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
mask_sampler
class
SampleAndCropForegroundMasksTest
(
tf
.
test
.
TestCase
):
def
test_sample_and_crop_foreground_masks
(
self
):
candidate_rois_np
=
np
.
array
(
[[[
0
,
0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1
,
1
],
[
2
,
2
,
4
,
4
],
[
1
,
1
,
5
,
5
]]])
candidate_rois
=
tf
.
constant
(
candidate_rois_np
,
dtype
=
tf
.
float32
)
candidate_gt_boxes_np
=
np
.
array
(
[[[
0
,
0
,
0.6
,
0.6
],
[
0
,
0
,
0
,
0
],
[
1
,
1
,
3
,
3
],
[
1
,
1
,
3
,
3
]]])
candidate_gt_boxes
=
tf
.
constant
(
candidate_gt_boxes_np
,
dtype
=
tf
.
float32
)
candidate_gt_classes_np
=
np
.
array
([[
4
,
0
,
0
,
2
]])
candidate_gt_classes
=
tf
.
constant
(
candidate_gt_classes_np
,
dtype
=
tf
.
float32
)
candidate_gt_indices_np
=
np
.
array
([[
10
,
-
1
,
-
1
,
20
]])
candidate_gt_indices
=
tf
.
constant
(
candidate_gt_indices_np
,
dtype
=
tf
.
int32
)
gt_masks_np
=
np
.
random
.
rand
(
1
,
100
,
32
,
32
)
gt_masks
=
tf
.
constant
(
gt_masks_np
,
dtype
=
tf
.
float32
)
num_mask_samples_per_image
=
2
mask_target_size
=
28
# Runs on TPU.
strategy
=
tf
.
distribute
.
TPUStrategy
()
with
strategy
.
scope
():
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
mask_sampler
.
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
num_mask_samples_per_image
,
mask_target_size
))
foreground_rois_tpu
=
foreground_rois
.
numpy
()
foreground_classes_tpu
=
foreground_classes
.
numpy
()
cropped_foreground_masks_tpu
=
cropped_foreground_masks
.
numpy
()
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
(
mask_sampler
.
_sample_and_crop_foreground_masks
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
,
num_mask_samples_per_image
,
mask_target_size
))
foreground_rois_cpu
=
foreground_rois
.
numpy
()
foreground_classes_cpu
=
foreground_classes
.
numpy
()
cropped_foreground_masks_cpu
=
cropped_foreground_masks
.
numpy
()
# consistency.
self
.
assertAllEqual
(
foreground_rois_tpu
.
shape
,
foreground_rois_cpu
.
shape
)
self
.
assertAllEqual
(
foreground_classes_tpu
.
shape
,
foreground_classes_cpu
.
shape
)
self
.
assertAllEqual
(
cropped_foreground_masks_tpu
.
shape
,
cropped_foreground_masks_cpu
.
shape
)
# correctnesss.
self
.
assertAllEqual
(
foreground_rois_tpu
.
shape
,
[
1
,
2
,
4
])
self
.
assertAllEqual
(
foreground_classes_tpu
.
shape
,
[
1
,
2
])
self
.
assertAllEqual
(
cropped_foreground_masks_tpu
.
shape
,
[
1
,
2
,
28
,
28
])
class
MaskSamplerTest
(
tf
.
test
.
TestCase
):
def
test_mask_sampler
(
self
):
candidate_rois_np
=
np
.
array
(
[[[
0
,
0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1
,
1
],
[
2
,
2
,
4
,
4
],
[
1
,
1
,
5
,
5
]]])
candidate_rois
=
tf
.
constant
(
candidate_rois_np
,
dtype
=
tf
.
float32
)
candidate_gt_boxes_np
=
np
.
array
(
[[[
0
,
0
,
0.6
,
0.6
],
[
0
,
0
,
0
,
0
],
[
1
,
1
,
3
,
3
],
[
1
,
1
,
3
,
3
]]])
candidate_gt_boxes
=
tf
.
constant
(
candidate_gt_boxes_np
,
dtype
=
tf
.
float32
)
candidate_gt_classes_np
=
np
.
array
([[
4
,
0
,
0
,
2
]])
candidate_gt_classes
=
tf
.
constant
(
candidate_gt_classes_np
,
dtype
=
tf
.
float32
)
candidate_gt_indices_np
=
np
.
array
([[
10
,
-
1
,
-
1
,
20
]])
candidate_gt_indices
=
tf
.
constant
(
candidate_gt_indices_np
,
dtype
=
tf
.
int32
)
gt_masks_np
=
np
.
random
.
rand
(
1
,
100
,
32
,
32
)
gt_masks
=
tf
.
constant
(
gt_masks_np
,
dtype
=
tf
.
float32
)
sampler
=
mask_sampler
.
MaskSampler
(
28
,
2
)
foreground_rois
,
foreground_classes
,
cropped_foreground_masks
=
sampler
(
candidate_rois
,
candidate_gt_boxes
,
candidate_gt_classes
,
candidate_gt_indices
,
gt_masks
)
# correctnesss.
self
.
assertAllEqual
(
foreground_rois
.
numpy
().
shape
,
[
1
,
2
,
4
])
self
.
assertAllEqual
(
foreground_classes
.
numpy
().
shape
,
[
1
,
2
])
self
.
assertAllEqual
(
cropped_foreground_masks
.
numpy
().
shape
,
[
1
,
2
,
28
,
28
])
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
mask_target_size
=
7
,
num_sampled_masks
=
10
,
)
sampler
=
mask_sampler
.
MaskSampler
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
sampler
.
get_config
(),
expected_config
)
new_sampler
=
mask_sampler
.
MaskSampler
.
from_config
(
sampler
.
get_config
())
self
.
assertAllEqual
(
sampler
.
get_config
(),
new_sampler
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/nn_blocks.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for neural networks."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Union
,
Text
# Import libraries
from
absl
import
logging
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.modeling.layers
import
nn_layers
def
_pad_strides
(
strides
:
int
,
axis
:
int
)
->
Tuple
[
int
,
int
,
int
,
int
]:
"""Converts int to len 4 strides (`tf.nn.avg_pool` uses length 4)."""
if
axis
==
1
:
return
(
1
,
1
,
strides
,
strides
)
else
:
return
(
1
,
strides
,
strides
,
1
)
def
_maybe_downsample
(
x
:
tf
.
Tensor
,
out_filter
:
int
,
strides
:
int
,
axis
:
int
)
->
tf
.
Tensor
:
"""Downsamples feature map and 0-pads tensor if in_filter != out_filter."""
data_format
=
'NCHW'
if
axis
==
1
else
'NHWC'
strides
=
_pad_strides
(
strides
,
axis
=
axis
)
x
=
tf
.
nn
.
avg_pool
(
x
,
strides
,
strides
,
'VALID'
,
data_format
=
data_format
)
in_filter
=
x
.
shape
[
axis
]
if
in_filter
<
out_filter
:
# Pad on channel dimension with 0s: half on top half on bottom.
pad_size
=
[(
out_filter
-
in_filter
)
//
2
,
(
out_filter
-
in_filter
)
//
2
]
if
axis
==
1
:
x
=
tf
.
pad
(
x
,
[[
0
,
0
],
pad_size
,
[
0
,
0
],
[
0
,
0
]])
else
:
x
=
tf
.
pad
(
x
,
[[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
pad_size
])
return
x
+
0.
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResidualBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A residual block."""
def
__init__
(
self
,
filters
,
strides
,
use_projection
=
False
,
se_ratio
=
None
,
resnetd_shortcut
=
False
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_explicit_padding
:
bool
=
False
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
bn_trainable
=
True
,
**
kwargs
):
"""Initializes a residual block with BN after convolutions.
Args:
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
strides: An `int` block stride. If greater than 1, this block will
ultimately downsample the input.
use_projection: A `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
resnetd_shortcut: A `bool` if True, apply the resnetd style modification
to the shortcut connection. Not implemented in residual blocks.
stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
bn_trainable: A `bool` that indicates whether batch norm layers should be
trainable. Default to True.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
ResidualBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_use_projection
=
use_projection
self
.
_se_ratio
=
se_ratio
self
.
_resnetd_shortcut
=
resnetd_shortcut
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
self
.
_bn_trainable
=
bn_trainable
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
conv1_padding
=
'same'
# explicit padding here is added for centernet
if
self
.
_use_explicit_padding
:
self
.
_pad
=
tf
.
keras
.
layers
.
ZeroPadding2D
(
padding
=
(
1
,
1
))
conv1_padding
=
'valid'
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
padding
=
conv1_padding
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
if
self
.
_se_ratio
and
self
.
_se_ratio
>
0
and
self
.
_se_ratio
<=
1
:
self
.
_squeeze_excitation
=
nn_layers
.
SqueezeExcitation
(
in_filters
=
self
.
_filters
,
out_filters
=
self
.
_filters
,
se_ratio
=
self
.
_se_ratio
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
else
:
self
.
_squeeze_excitation
=
None
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
super
(
ResidualBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'use_projection'
:
self
.
_use_projection
,
'se_ratio'
:
self
.
_se_ratio
,
'resnetd_shortcut'
:
self
.
_resnetd_shortcut
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_explicit_padding'
:
self
.
_use_explicit_padding
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'bn_trainable'
:
self
.
_bn_trainable
}
base_config
=
super
(
ResidualBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
if
self
.
_use_projection
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
if
self
.
_use_explicit_padding
:
inputs
=
self
.
_pad
(
inputs
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
if
self
.
_squeeze_excitation
:
x
=
self
.
_squeeze_excitation
(
x
)
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
return
self
.
_activation_fn
(
x
+
shortcut
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""A standard bottleneck block."""
def
__init__
(
self
,
filters
,
strides
,
dilation_rate
=
1
,
use_projection
=
False
,
se_ratio
=
None
,
resnetd_shortcut
=
False
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
bn_trainable
=
True
,
**
kwargs
):
"""Initializes a standard bottleneck block with BN after convolutions.
Args:
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
strides: An `int` block stride. If greater than 1, this block will
ultimately downsample the input.
dilation_rate: An `int` dilation_rate of convolutions. Default to 1.
use_projection: A `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
resnetd_shortcut: A `bool`. If True, apply the resnetd style modification
to the shortcut connection.
stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for
the stochastic depth layer.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
bn_trainable: A `bool` that indicates whether batch norm layers should be
trainable. Default to True.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
BottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_strides
=
strides
self
.
_dilation_rate
=
dilation_rate
self
.
_use_projection
=
use_projection
self
.
_se_ratio
=
se_ratio
self
.
_resnetd_shortcut
=
resnetd_shortcut
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_bn_trainable
=
bn_trainable
def
build
(
self
,
input_shape
):
if
self
.
_use_projection
:
if
self
.
_resnetd_shortcut
:
self
.
_shortcut0
=
tf
.
keras
.
layers
.
AveragePooling2D
(
pool_size
=
2
,
strides
=
self
.
_strides
,
padding
=
'same'
)
self
.
_shortcut1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
else
:
self
.
_shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
self
.
_strides
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
self
.
_activation1
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
3
,
strides
=
self
.
_strides
,
dilation_rate
=
self
.
_dilation_rate
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
self
.
_activation2
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
self
.
_conv3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
,
trainable
=
self
.
_bn_trainable
)
self
.
_activation3
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
if
self
.
_se_ratio
and
self
.
_se_ratio
>
0
and
self
.
_se_ratio
<=
1
:
self
.
_squeeze_excitation
=
nn_layers
.
SqueezeExcitation
(
in_filters
=
self
.
_filters
*
4
,
out_filters
=
self
.
_filters
*
4
,
se_ratio
=
self
.
_se_ratio
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
else
:
self
.
_squeeze_excitation
=
None
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
self
.
_add
=
tf
.
keras
.
layers
.
Add
()
super
(
BottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'dilation_rate'
:
self
.
_dilation_rate
,
'use_projection'
:
self
.
_use_projection
,
'se_ratio'
:
self
.
_se_ratio
,
'resnetd_shortcut'
:
self
.
_resnetd_shortcut
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'bn_trainable'
:
self
.
_bn_trainable
}
base_config
=
super
(
BottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
if
self
.
_use_projection
:
if
self
.
_resnetd_shortcut
:
shortcut
=
self
.
_shortcut0
(
shortcut
)
shortcut
=
self
.
_shortcut1
(
shortcut
)
else
:
shortcut
=
self
.
_shortcut
(
shortcut
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_conv1
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation1
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation2
(
x
)
x
=
self
.
_conv3
(
x
)
x
=
self
.
_norm3
(
x
)
if
self
.
_squeeze_excitation
:
x
=
self
.
_squeeze_excitation
(
x
)
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
x
=
self
.
_add
([
x
,
shortcut
])
return
self
.
_activation3
(
x
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
InvertedBottleneckBlock
(
tf
.
keras
.
layers
.
Layer
):
"""An inverted bottleneck block."""
def
__init__
(
self
,
in_filters
,
out_filters
,
expand_ratio
,
strides
,
kernel_size
=
3
,
se_ratio
=
None
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
se_inner_activation
=
'relu'
,
se_gating_activation
=
'sigmoid'
,
se_round_down_protect
=
True
,
expand_se_in_filters
=
False
,
depthwise_activation
=
None
,
use_sync_bn
=
False
,
dilation_rate
=
1
,
divisible_by
=
1
,
regularize_depthwise
=
False
,
use_depthwise
=
True
,
use_residual
=
True
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
output_intermediate_endpoints
=
False
,
**
kwargs
):
"""Initializes an inverted bottleneck block with BN after convolutions.
Args:
in_filters: An `int` number of filters of the input tensor.
out_filters: An `int` number of filters of the output tensor.
expand_ratio: An `int` of expand_ratio for an inverted bottleneck block.
strides: An `int` block stride. If greater than 1, this block will
ultimately downsample the input.
kernel_size: An `int` kernel_size of the depthwise conv layer.
se_ratio: A `float` or None. If not None, se ratio for the squeeze and
excitation layer.
stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
se_inner_activation: A `str` name of squeeze-excitation inner activation.
se_gating_activation: A `str` name of squeeze-excitation gating
activation.
se_round_down_protect: A `bool` of whether round down more than 10%
will be allowed in SE layer.
expand_se_in_filters: A `bool` of whether or not to expand in_filter in
squeeze and excitation layer.
depthwise_activation: A `str` name of the activation function for
depthwise only.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
dilation_rate: An `int` that specifies the dilation rate to use for.
divisible_by: An `int` that ensures all inner dimensions are divisible by
this number.
dilated convolution: An `int` to specify the same value for all spatial
dimensions.
regularize_depthwise: A `bool` of whether or not apply regularization on
depthwise.
use_depthwise: A `bool` of whether to uses fused convolutions instead of
depthwise.
use_residual: A `bool` of whether to include residual connection between
input and output.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
output_intermediate_endpoints: A `bool` of whether or not output the
intermediate endpoints.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
InvertedBottleneckBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_in_filters
=
in_filters
self
.
_out_filters
=
out_filters
self
.
_expand_ratio
=
expand_ratio
self
.
_strides
=
strides
self
.
_kernel_size
=
kernel_size
self
.
_se_ratio
=
se_ratio
self
.
_divisible_by
=
divisible_by
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_dilation_rate
=
dilation_rate
self
.
_use_sync_bn
=
use_sync_bn
self
.
_regularize_depthwise
=
regularize_depthwise
self
.
_use_depthwise
=
use_depthwise
self
.
_use_residual
=
use_residual
self
.
_activation
=
activation
self
.
_se_inner_activation
=
se_inner_activation
self
.
_se_gating_activation
=
se_gating_activation
self
.
_depthwise_activation
=
depthwise_activation
self
.
_se_round_down_protect
=
se_round_down_protect
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
self
.
_expand_se_in_filters
=
expand_se_in_filters
self
.
_output_intermediate_endpoints
=
output_intermediate_endpoints
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
if
not
depthwise_activation
:
self
.
_depthwise_activation
=
activation
if
regularize_depthwise
:
self
.
_depthsize_regularizer
=
kernel_regularizer
else
:
self
.
_depthsize_regularizer
=
None
def
build
(
self
,
input_shape
):
expand_filters
=
self
.
_in_filters
if
self
.
_expand_ratio
>
1
:
# First 1x1 conv for channel expansion.
expand_filters
=
nn_layers
.
make_divisible
(
self
.
_in_filters
*
self
.
_expand_ratio
,
self
.
_divisible_by
)
expand_kernel
=
1
if
self
.
_use_depthwise
else
self
.
_kernel_size
expand_stride
=
1
if
self
.
_use_depthwise
else
self
.
_strides
self
.
_conv0
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
expand_filters
,
kernel_size
=
expand_kernel
,
strides
=
expand_stride
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_activation_layer
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
if
self
.
_use_depthwise
:
# Depthwise conv.
self
.
_conv1
=
tf
.
keras
.
layers
.
DepthwiseConv2D
(
kernel_size
=
(
self
.
_kernel_size
,
self
.
_kernel_size
),
strides
=
self
.
_strides
,
padding
=
'same'
,
depth_multiplier
=
1
,
dilation_rate
=
self
.
_dilation_rate
,
use_bias
=
False
,
depthwise_initializer
=
self
.
_kernel_initializer
,
depthwise_regularizer
=
self
.
_depthsize_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_depthwise_activation_layer
=
tf_utils
.
get_activation
(
self
.
_depthwise_activation
,
use_keras_layer
=
True
)
# Squeeze and excitation.
if
self
.
_se_ratio
and
self
.
_se_ratio
>
0
and
self
.
_se_ratio
<=
1
:
logging
.
info
(
'Use Squeeze and excitation.'
)
in_filters
=
self
.
_in_filters
if
self
.
_expand_se_in_filters
:
in_filters
=
expand_filters
self
.
_squeeze_excitation
=
nn_layers
.
SqueezeExcitation
(
in_filters
=
in_filters
,
out_filters
=
expand_filters
,
se_ratio
=
self
.
_se_ratio
,
divisible_by
=
self
.
_divisible_by
,
round_down_protect
=
self
.
_se_round_down_protect
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
,
activation
=
self
.
_se_inner_activation
,
gating_activation
=
self
.
_se_gating_activation
)
else
:
self
.
_squeeze_excitation
=
None
# Last 1x1 conv.
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_out_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
self
.
_add
=
tf
.
keras
.
layers
.
Add
()
super
(
InvertedBottleneckBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'in_filters'
:
self
.
_in_filters
,
'out_filters'
:
self
.
_out_filters
,
'expand_ratio'
:
self
.
_expand_ratio
,
'strides'
:
self
.
_strides
,
'kernel_size'
:
self
.
_kernel_size
,
'se_ratio'
:
self
.
_se_ratio
,
'divisible_by'
:
self
.
_divisible_by
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'se_inner_activation'
:
self
.
_se_inner_activation
,
'se_gating_activation'
:
self
.
_se_gating_activation
,
'se_round_down_protect'
:
self
.
_se_round_down_protect
,
'expand_se_in_filters'
:
self
.
_expand_se_in_filters
,
'depthwise_activation'
:
self
.
_depthwise_activation
,
'dilation_rate'
:
self
.
_dilation_rate
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'regularize_depthwise'
:
self
.
_regularize_depthwise
,
'use_depthwise'
:
self
.
_use_depthwise
,
'use_residual'
:
self
.
_use_residual
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
InvertedBottleneckBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
endpoints
=
{}
shortcut
=
inputs
if
self
.
_expand_ratio
>
1
:
x
=
self
.
_conv0
(
inputs
)
x
=
self
.
_norm0
(
x
)
x
=
self
.
_activation_layer
(
x
)
else
:
x
=
inputs
if
self
.
_use_depthwise
:
x
=
self
.
_conv1
(
x
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_depthwise_activation_layer
(
x
)
if
self
.
_output_intermediate_endpoints
:
endpoints
[
'depthwise'
]
=
x
if
self
.
_squeeze_excitation
:
x
=
self
.
_squeeze_excitation
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
if
(
self
.
_use_residual
and
self
.
_in_filters
==
self
.
_out_filters
and
self
.
_strides
==
1
):
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
x
=
self
.
_add
([
x
,
shortcut
])
if
self
.
_output_intermediate_endpoints
:
return
x
,
endpoints
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ResidualInner
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a single inner block of a residual.
This corresponds to `F`/`G` functions in the RevNet paper:
Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
The Reversible Residual Network: Backpropagation Without Storing Activations.
(https://arxiv.org/pdf/1707.04585.pdf)
"""
def
__init__
(
self
,
filters
:
int
,
strides
:
int
,
kernel_initializer
:
Union
[
str
,
Callable
[
...,
tf
.
keras
.
initializers
.
Initializer
]]
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
Union
[
str
,
Callable
[...,
tf
.
Tensor
]]
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
batch_norm_first
:
bool
=
True
,
**
kwargs
):
"""Initializes a ResidualInner.
Args:
filters: An `int` of output filter size.
strides: An `int` of stride size for convolution for the residual block.
kernel_initializer: A `str` or `tf.keras.initializers.Initializer`
instance for convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` for Conv2D.
activation: A `str` or `callable` instance of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
batch_norm_first: A `bool` of whether to apply activation and batch norm
before conv.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
ResidualInner
,
self
).
__init__
(
**
kwargs
)
self
.
strides
=
strides
self
.
filters
=
filters
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_batch_norm_first
=
batch_norm_first
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
tf
.
TensorShape
):
if
self
.
_batch_norm_first
:
self
.
_batch_norm_0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
self
.
strides
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
super
(
ResidualInner
,
self
).
build
(
input_shape
)
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'filters'
:
self
.
filters
,
'strides'
:
self
.
strides
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'batch_norm_first'
:
self
.
_batch_norm_first
,
}
base_config
=
super
(
ResidualInner
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
x
=
inputs
if
self
.
_batch_norm_first
:
x
=
self
.
_batch_norm_0
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_1
(
x
)
x
=
self
.
_batch_norm_1
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_2
(
x
)
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckResidualInner
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a single inner block of a bottleneck.
This corresponds to `F`/`G` functions in the RevNet paper:
Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
The Reversible Residual Network: Backpropagation Without Storing Activations.
(https://arxiv.org/pdf/1707.04585.pdf)
"""
def
__init__
(
self
,
filters
:
int
,
strides
:
int
,
kernel_initializer
:
Union
[
str
,
Callable
[
...,
tf
.
keras
.
initializers
.
Initializer
]]
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
activation
:
Union
[
str
,
Callable
[...,
tf
.
Tensor
]]
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
batch_norm_first
:
bool
=
True
,
**
kwargs
):
"""Initializes a BottleneckResidualInner.
Args:
filters: An `int` number of filters for first 2 convolutions. Last Last,
and thus the number of output channels from the bottlneck block is
`4*filters`
strides: An `int` of stride size for convolution for the residual block.
kernel_initializer: A `str` or `tf.keras.initializers.Initializer`
instance for convolutional layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` for Conv2D.
activation: A `str` or `callable` instance of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
batch_norm_first: A `bool` of whether to apply activation and batch norm
before conv.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
BottleneckResidualInner
,
self
).
__init__
(
**
kwargs
)
self
.
strides
=
strides
self
.
filters
=
filters
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_batch_norm_first
=
batch_norm_first
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
tf
.
TensorShape
):
if
self
.
_batch_norm_first
:
self
.
_batch_norm_0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
1
,
strides
=
self
.
strides
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
,
kernel_size
=
3
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_batch_norm_2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv2d_3
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
filters
*
4
,
kernel_size
=
1
,
strides
=
1
,
use_bias
=
False
,
padding
=
'same'
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
super
(
BottleneckResidualInner
,
self
).
build
(
input_shape
)
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'filters'
:
self
.
filters
,
'strides'
:
self
.
strides
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
,
'batch_norm_first'
:
self
.
_batch_norm_first
,
}
base_config
=
super
(
BottleneckResidualInner
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
x
=
inputs
if
self
.
_batch_norm_first
:
x
=
self
.
_batch_norm_0
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_1
(
x
)
x
=
self
.
_batch_norm_1
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_2
(
x
)
x
=
self
.
_batch_norm_2
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv2d_3
(
x
)
return
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
ReversibleLayer
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a reversible layer.
Computes y1 = x1 + f(x2), y2 = x2 + g(y1), where f and g can be arbitrary
layers that are stateless, which in this case are `ResidualInner` layers.
"""
def
__init__
(
self
,
f
:
tf
.
keras
.
layers
.
Layer
,
g
:
tf
.
keras
.
layers
.
Layer
,
manual_grads
:
bool
=
True
,
**
kwargs
):
"""Initializes a ReversibleLayer.
Args:
f: A `tf.keras.layers.Layer` instance of `f` inner block referred to in
paper. Each reversible layer consists of two inner functions. For
example, in RevNet the reversible residual consists of two f/g inner
(bottleneck) residual functions. Where the input to the reversible layer
is x, the input gets partitioned in the channel dimension and the
forward pass follows (eq8): x = [x1; x2], z1 = x1 + f(x2), y2 = x2 +
g(z1), y1 = stop_gradient(z1).
g: A `tf.keras.layers.Layer` instance of `g` inner block referred to in
paper. Detailed explanation same as above as `f` arg.
manual_grads: A `bool` [Testing Only] of whether to manually take
gradients as in Algorithm 1 or defer to autograd.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
ReversibleLayer
,
self
).
__init__
(
**
kwargs
)
self
.
_f
=
f
self
.
_g
=
g
self
.
_manual_grads
=
manual_grads
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_axis
=
-
1
else
:
self
.
_axis
=
1
def
get_config
(
self
)
->
Dict
[
str
,
Any
]:
config
=
{
'f'
:
self
.
_f
,
'g'
:
self
.
_g
,
'manual_grads'
:
self
.
_manual_grads
,
}
base_config
=
super
(
ReversibleLayer
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
_ckpt_non_trainable_vars
(
self
):
self
.
_f_non_trainable_vars
=
[
v
.
read_value
()
for
v
in
self
.
_f
.
non_trainable_variables
]
self
.
_g_non_trainable_vars
=
[
v
.
read_value
()
for
v
in
self
.
_g
.
non_trainable_variables
]
def
_load_ckpt_non_trainable_vars
(
self
):
for
v
,
v_chkpt
in
zip
(
self
.
_f
.
non_trainable_variables
,
self
.
_f_non_trainable_vars
):
v
.
assign
(
v_chkpt
)
for
v
,
v_chkpt
in
zip
(
self
.
_g
.
non_trainable_variables
,
self
.
_g_non_trainable_vars
):
v
.
assign
(
v_chkpt
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
@
tf
.
custom_gradient
def
reversible
(
x
:
tf
.
Tensor
)
->
Tuple
[
tf
.
Tensor
,
Callable
[[
Any
],
Tuple
[
List
[
tf
.
Tensor
],
List
[
tf
.
Tensor
]]]]:
"""Implements Algorithm 1 in the RevNet paper.
Aidan N. Gomez, Mengye Ren, Raquel Urtasun, Roger B. Grosse.
The Reversible Residual Network: Backpropagation Without Storing
Activations.
(https://arxiv.org/pdf/1707.04585.pdf)
Args:
x: An input `tf.Tensor.
Returns:
y: The output [y1; y2] in Algorithm 1.
grad_fn: A callable function that computes the gradients.
"""
with
tf
.
GradientTape
()
as
fwdtape
:
fwdtape
.
watch
(
x
)
x1
,
x2
=
tf
.
split
(
x
,
num_or_size_splits
=
2
,
axis
=
self
.
_axis
)
f_x2
=
self
.
_f
(
x2
,
training
=
training
)
x1_down
=
_maybe_downsample
(
x1
,
f_x2
.
shape
[
self
.
_axis
],
self
.
_f
.
strides
,
self
.
_axis
)
z1
=
f_x2
+
x1_down
g_z1
=
self
.
_g
(
z1
,
training
=
training
)
x2_down
=
_maybe_downsample
(
x2
,
g_z1
.
shape
[
self
.
_axis
],
self
.
_f
.
strides
,
self
.
_axis
)
y2
=
x2_down
+
g_z1
# Equation 8: https://arxiv.org/pdf/1707.04585.pdf
# Decouple y1 and z1 so that their derivatives are different.
y1
=
tf
.
identity
(
z1
)
y
=
tf
.
concat
([
y1
,
y2
],
axis
=
self
.
_axis
)
irreversible
=
((
self
.
_f
.
strides
!=
1
or
self
.
_g
.
strides
!=
1
)
or
(
y
.
shape
[
self
.
_axis
]
!=
inputs
.
shape
[
self
.
_axis
]))
# Checkpointing moving mean/variance for batch normalization layers
# as they shouldn't be updated during the custom gradient pass of f/g.
self
.
_ckpt_non_trainable_vars
()
def
grad_fn
(
dy
:
tf
.
Tensor
,
variables
:
Optional
[
List
[
tf
.
Variable
]]
=
None
,
)
->
Tuple
[
List
[
tf
.
Tensor
],
List
[
tf
.
Tensor
]]:
"""Given dy calculate (dy/dx)|_{x_{input}} using f/g."""
if
irreversible
or
not
self
.
_manual_grads
:
grads_combined
=
fwdtape
.
gradient
(
y
,
[
x
]
+
variables
,
output_gradients
=
dy
)
dx
=
grads_combined
[
0
]
grad_vars
=
grads_combined
[
1
:]
else
:
y1_nograd
=
tf
.
stop_gradient
(
y1
)
y2_nograd
=
tf
.
stop_gradient
(
y2
)
dy1
,
dy2
=
tf
.
split
(
dy
,
num_or_size_splits
=
2
,
axis
=
self
.
_axis
)
# Index mapping from self.f/g.trainable_variables to grad_fn
# input `variables` kwarg so that we can reorder dwf + dwg
# variable gradient list to match `variables` order.
f_var_refs
=
[
v
.
ref
()
for
v
in
self
.
_f
.
trainable_variables
]
g_var_refs
=
[
v
.
ref
()
for
v
in
self
.
_g
.
trainable_variables
]
fg_var_refs
=
f_var_refs
+
g_var_refs
self_to_var_index
=
[
fg_var_refs
.
index
(
v
.
ref
())
for
v
in
variables
]
# Algorithm 1 in paper (line # documented in-line)
z1
=
y1_nograd
# line 2
with
tf
.
GradientTape
()
as
gtape
:
gtape
.
watch
(
z1
)
g_z1
=
self
.
_g
(
z1
,
training
=
training
)
x2
=
y2_nograd
-
g_z1
# line 3
with
tf
.
GradientTape
()
as
ftape
:
ftape
.
watch
(
x2
)
f_x2
=
self
.
_f
(
x2
,
training
=
training
)
x1
=
z1
-
f_x2
# pylint: disable=unused-variable # line 4
# Compute gradients
g_grads_combined
=
gtape
.
gradient
(
g_z1
,
[
z1
]
+
self
.
_g
.
trainable_variables
,
output_gradients
=
dy2
)
dz1
=
dy1
+
g_grads_combined
[
0
]
# line 5
dwg
=
g_grads_combined
[
1
:]
# line 9
f_grads_combined
=
ftape
.
gradient
(
f_x2
,
[
x2
]
+
self
.
_f
.
trainable_variables
,
output_gradients
=
dz1
)
dx2
=
dy2
+
f_grads_combined
[
0
]
# line 6
dwf
=
f_grads_combined
[
1
:]
# line 8
dx1
=
dz1
# line 7
# Pack the input and variable gradients.
dx
=
tf
.
concat
([
dx1
,
dx2
],
axis
=
self
.
_axis
)
grad_vars
=
dwf
+
dwg
# Reorder gradients (trainable_variables to variables kwarg order)
grad_vars
=
[
grad_vars
[
i
]
for
i
in
self_to_var_index
]
# Restore batch normalization moving mean/variance for correctness.
self
.
_load_ckpt_non_trainable_vars
()
return
dx
,
grad_vars
# grad_fn end
return
y
,
grad_fn
# reversible end
activations
=
reversible
(
inputs
)
return
activations
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DepthwiseSeparableConvBlock
(
tf
.
keras
.
layers
.
Layer
):
"""Creates an depthwise separable convolution block with batch normalization."""
def
__init__
(
self
,
filters
:
int
,
kernel_size
:
int
=
3
,
strides
:
int
=
1
,
regularize_depthwise
=
False
,
activation
:
Text
=
'relu6'
,
kernel_initializer
:
Text
=
'VarianceScaling'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
dilation_rate
:
int
=
1
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
**
kwargs
):
"""Initializes a convolution block with batch normalization.
Args:
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
kernel_size: An `int` that specifies the height and width of the 2D
convolution window.
strides: An `int` of block stride. If greater than 1, this block will
ultimately downsample the input.
regularize_depthwise: A `bool`. If Ture, apply regularization on
depthwise.
activation: A `str` name of the activation function.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
dilation_rate: An `int` or tuple/list of 2 `int`, specifying the dilation
rate to use for dilated convolution. Can be a single integer to specify
the same value for all spatial dimensions.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
DepthwiseSeparableConvBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_kernel_size
=
kernel_size
self
.
_strides
=
strides
self
.
_activation
=
activation
self
.
_regularize_depthwise
=
regularize_depthwise
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_dilation_rate
=
dilation_rate
self
.
_use_sync_bn
=
use_sync_bn
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
if
regularize_depthwise
:
self
.
_depthsize_regularizer
=
kernel_regularizer
else
:
self
.
_depthsize_regularizer
=
None
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'strides'
:
self
.
_strides
,
'regularize_depthwise'
:
self
.
_regularize_depthwise
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
DepthwiseSeparableConvBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
build
(
self
,
input_shape
):
self
.
_dwconv0
=
tf
.
keras
.
layers
.
DepthwiseConv2D
(
kernel_size
=
self
.
_kernel_size
,
strides
=
self
.
_strides
,
padding
=
'same'
,
depth_multiplier
=
1
,
dilation_rate
=
self
.
_dilation_rate
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_depthsize_regularizer
,
use_bias
=
False
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
super
(
DepthwiseSeparableConvBlock
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
,
training
=
None
):
x
=
self
.
_dwconv0
(
inputs
)
x
=
self
.
_norm0
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_conv1
(
x
)
x
=
self
.
_norm1
(
x
)
return
self
.
_activation_fn
(
x
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
TuckerConvBlock
(
tf
.
keras
.
layers
.
Layer
):
"""An Tucker block (generalized bottleneck)."""
def
__init__
(
self
,
in_filters
,
out_filters
,
input_compression_ratio
,
output_compression_ratio
,
strides
,
kernel_size
=
3
,
stochastic_depth_drop_rate
=
None
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
divisible_by
=
1
,
use_residual
=
True
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""Initializes an inverted bottleneck block with BN after convolutions.
Args:
in_filters: An `int` number of filters of the input tensor.
out_filters: An `int` number of filters of the output tensor.
input_compression_ratio: An `float` of compression ratio for
input filters.
output_compression_ratio: An `float` of compression ratio for
output filters.
strides: An `int` block stride. If greater than 1, this block will
ultimately downsample the input.
kernel_size: An `int` kernel_size of the depthwise conv layer.
stochastic_depth_drop_rate: A `float` or None. if not None, drop rate for
the stochastic depth layer.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
divisible_by: An `int` that ensures all inner dimensions are divisible by
this number.
use_residual: A `bool` of whether to include residual connection between
input and output.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
TuckerConvBlock
,
self
).
__init__
(
**
kwargs
)
self
.
_in_filters
=
in_filters
self
.
_out_filters
=
out_filters
self
.
_input_compression_ratio
=
input_compression_ratio
self
.
_output_compression_ratio
=
output_compression_ratio
self
.
_strides
=
strides
self
.
_kernel_size
=
kernel_size
self
.
_divisible_by
=
divisible_by
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_use_sync_bn
=
use_sync_bn
self
.
_use_residual
=
use_residual
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
def
build
(
self
,
input_shape
):
input_compressed_filters
=
nn_layers
.
make_divisible
(
value
=
self
.
_in_filters
*
self
.
_input_compression_ratio
,
divisor
=
self
.
_divisible_by
,
round_down_protect
=
False
)
self
.
_conv0
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
input_compressed_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_activation_layer0
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
output_compressed_filters
=
nn_layers
.
make_divisible
(
value
=
self
.
_out_filters
*
self
.
_output_compression_ratio
,
divisor
=
self
.
_divisible_by
,
round_down_protect
=
False
)
self
.
_conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
output_compressed_filters
,
kernel_size
=
self
.
_kernel_size
,
strides
=
self
.
_strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_activation_layer1
=
tf_utils
.
get_activation
(
self
.
_activation
,
use_keras_layer
=
True
)
# Last 1x1 conv.
self
.
_conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_out_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
self
.
_add
=
tf
.
keras
.
layers
.
Add
()
super
(
TuckerConvBlock
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'in_filters'
:
self
.
_in_filters
,
'out_filters'
:
self
.
_out_filters
,
'input_compression_ratio'
:
self
.
_input_compression_ratio
,
'output_compression_ratio'
:
self
.
_output_compression_ratio
,
'strides'
:
self
.
_strides
,
'kernel_size'
:
self
.
_kernel_size
,
'divisible_by'
:
self
.
_divisible_by
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'use_residual'
:
self
.
_use_residual
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
TuckerConvBlock
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
shortcut
=
inputs
x
=
self
.
_conv0
(
inputs
)
x
=
self
.
_norm0
(
x
)
x
=
self
.
_activation_layer0
(
x
)
x
=
self
.
_conv1
(
x
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_layer1
(
x
)
x
=
self
.
_conv2
(
x
)
x
=
self
.
_norm2
(
x
)
if
(
self
.
_use_residual
and
self
.
_in_filters
==
self
.
_out_filters
and
self
.
_strides
==
1
):
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
x
=
self
.
_add
([
x
,
shortcut
])
return
x
official/vision/modeling/layers/nn_blocks_3d.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for 3D networks."""
# Import libraries
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.modeling.layers
import
nn_layers
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SelfGating
(
tf
.
keras
.
layers
.
Layer
):
"""Feature gating as used in S3D-G.
This implements the S3D-G network from:
Saining Xie, Chen Sun, Jonathan Huang, Zhuowen Tu, Kevin Murphy.
Rethinking Spatiotemporal Feature Learning: Speed-Accuracy Trade-offs in Video
Classification.
(https://arxiv.org/pdf/1712.04851.pdf)
"""
def
__init__
(
self
,
filters
,
**
kwargs
):
"""Initializes a self-gating layer.
Args:
filters: An `int` number of filters for the convolutional layer.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
SelfGating
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
def
build
(
self
,
input_shape
):
self
.
_spatial_temporal_average
=
tf
.
keras
.
layers
.
GlobalAveragePooling3D
()
# No BN and activation after conv.
self
.
_transformer_w
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
1
,
1
],
use_bias
=
True
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
mean
=
0.0
,
stddev
=
0.01
))
super
(
SelfGating
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
):
x
=
self
.
_spatial_temporal_average
(
inputs
)
x
=
tf
.
expand_dims
(
x
,
1
)
x
=
tf
.
expand_dims
(
x
,
2
)
x
=
tf
.
expand_dims
(
x
,
3
)
x
=
self
.
_transformer_w
(
x
)
x
=
tf
.
nn
.
sigmoid
(
x
)
return
tf
.
math
.
multiply
(
x
,
inputs
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
BottleneckBlock3D
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a 3D bottleneck block."""
def
__init__
(
self
,
filters
,
temporal_kernel_size
,
temporal_strides
,
spatial_strides
,
stochastic_depth_drop_rate
=
0.0
,
se_ratio
=
None
,
use_self_gating
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
use_sync_bn
=
False
,
norm_momentum
=
0.99
,
norm_epsilon
=
0.001
,
**
kwargs
):
"""Initializes a 3D bottleneck block with BN after convolutions.
Args:
filters: An `int` number of filters for the first two convolutions. Note
that the third and final convolution will use 4 times as many filters.
temporal_kernel_size: An `int` of kernel size for the temporal
convolutional layer.
temporal_strides: An `int` of ftemporal stride for the temporal
convolutional layer.
spatial_strides: An `int` of spatial stride for the spatial convolutional
layer.
stochastic_depth_drop_rate: A `float` or None. If not None, drop rate for
the stochastic depth layer.
se_ratio: A `float` or None. Ratio of the Squeeze-and-Excitation layer.
use_self_gating: A `bool` of whether to apply self-gating module or not.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
BottleneckBlock3D
,
self
).
__init__
(
**
kwargs
)
self
.
_filters
=
filters
self
.
_temporal_kernel_size
=
temporal_kernel_size
self
.
_spatial_strides
=
spatial_strides
self
.
_temporal_strides
=
temporal_strides
self
.
_stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
self
.
_use_self_gating
=
use_self_gating
self
.
_se_ratio
=
se_ratio
self
.
_use_sync_bn
=
use_sync_bn
self
.
_activation
=
activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_norm_momentum
=
norm_momentum
self
.
_norm_epsilon
=
norm_epsilon
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
use_sync_bn
:
self
.
_norm
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_norm
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
):
self
.
_shortcut_maxpool
=
tf
.
keras
.
layers
.
MaxPool3D
(
pool_size
=
[
1
,
1
,
1
],
strides
=
[
self
.
_temporal_strides
,
self
.
_spatial_strides
,
self
.
_spatial_strides
])
self
.
_shortcut_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
4
*
self
.
_filters
,
kernel_size
=
1
,
strides
=
[
self
.
_temporal_strides
,
self
.
_spatial_strides
,
self
.
_spatial_strides
],
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm0
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_temporal_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
self
.
_temporal_kernel_size
,
1
,
1
],
strides
=
[
self
.
_temporal_strides
,
1
,
1
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm1
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_spatial_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
self
.
_filters
,
kernel_size
=
[
1
,
3
,
3
],
strides
=
[
1
,
self
.
_spatial_strides
,
self
.
_spatial_strides
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm2
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
self
.
_expand_conv
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
4
*
self
.
_filters
,
kernel_size
=
[
1
,
1
,
1
],
strides
=
[
1
,
1
,
1
],
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_norm3
=
self
.
_norm
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_norm_momentum
,
epsilon
=
self
.
_norm_epsilon
)
if
self
.
_se_ratio
and
self
.
_se_ratio
>
0
and
self
.
_se_ratio
<=
1
:
self
.
_squeeze_excitation
=
nn_layers
.
SqueezeExcitation
(
in_filters
=
self
.
_filters
*
4
,
out_filters
=
self
.
_filters
*
4
,
se_ratio
=
self
.
_se_ratio
,
use_3d_input
=
True
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
else
:
self
.
_squeeze_excitation
=
None
if
self
.
_stochastic_depth_drop_rate
:
self
.
_stochastic_depth
=
nn_layers
.
StochasticDepth
(
self
.
_stochastic_depth_drop_rate
)
else
:
self
.
_stochastic_depth
=
None
if
self
.
_use_self_gating
:
self
.
_self_gating
=
SelfGating
(
filters
=
4
*
self
.
_filters
)
else
:
self
.
_self_gating
=
None
super
(
BottleneckBlock3D
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'filters'
:
self
.
_filters
,
'temporal_kernel_size'
:
self
.
_temporal_kernel_size
,
'temporal_strides'
:
self
.
_temporal_strides
,
'spatial_strides'
:
self
.
_spatial_strides
,
'use_self_gating'
:
self
.
_use_self_gating
,
'se_ratio'
:
self
.
_se_ratio
,
'stochastic_depth_drop_rate'
:
self
.
_stochastic_depth_drop_rate
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'norm_momentum'
:
self
.
_norm_momentum
,
'norm_epsilon'
:
self
.
_norm_epsilon
}
base_config
=
super
(
BottleneckBlock3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
in_filters
=
inputs
.
shape
.
as_list
()[
-
1
]
if
in_filters
==
4
*
self
.
_filters
:
if
self
.
_temporal_strides
==
1
and
self
.
_spatial_strides
==
1
:
shortcut
=
inputs
else
:
shortcut
=
self
.
_shortcut_maxpool
(
inputs
)
else
:
shortcut
=
self
.
_shortcut_conv
(
inputs
)
shortcut
=
self
.
_norm0
(
shortcut
)
x
=
self
.
_temporal_conv
(
inputs
)
x
=
self
.
_norm1
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_spatial_conv
(
x
)
x
=
self
.
_norm2
(
x
)
x
=
self
.
_activation_fn
(
x
)
x
=
self
.
_expand_conv
(
x
)
x
=
self
.
_norm3
(
x
)
# Apply self-gating, SE, stochastic depth.
if
self
.
_self_gating
:
x
=
self
.
_self_gating
(
x
)
if
self
.
_squeeze_excitation
:
x
=
self
.
_squeeze_excitation
(
x
)
if
self
.
_stochastic_depth
:
x
=
self
.
_stochastic_depth
(
x
,
training
=
training
)
# Apply activation before additional modules.
x
=
self
.
_activation_fn
(
x
+
shortcut
)
return
x
official/vision/modeling/layers/nn_blocks_3d_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for resnet."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
nn_blocks_3d
class
NNBlocksTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
nn_blocks_3d
.
BottleneckBlock3D
,
1
,
1
,
2
,
True
,
0.2
,
0.1
),
(
nn_blocks_3d
.
BottleneckBlock3D
,
3
,
2
,
1
,
False
,
0.0
,
0.0
),
)
def
test_bottleneck_block_creation
(
self
,
block_fn
,
temporal_kernel_size
,
temporal_strides
,
spatial_strides
,
use_self_gating
,
se_ratio
,
stochastic_depth
):
temporal_size
=
16
spatial_size
=
128
filters
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
temporal_size
,
spatial_size
,
spatial_size
,
filters
*
4
),
batch_size
=
1
)
block
=
block_fn
(
filters
=
filters
,
temporal_kernel_size
=
temporal_kernel_size
,
temporal_strides
=
temporal_strides
,
spatial_strides
=
spatial_strides
,
use_self_gating
=
use_self_gating
,
se_ratio
=
se_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth
)
features
=
block
(
inputs
)
self
.
assertAllEqual
([
1
,
temporal_size
//
temporal_strides
,
spatial_size
//
spatial_strides
,
spatial_size
//
spatial_strides
,
filters
*
4
],
features
.
shape
.
as_list
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/nn_blocks_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for nn_blocks."""
from
typing
import
Any
,
Iterable
,
Tuple
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
tensorflow.python.distribute
import
strategy_combinations
from
official.vision.modeling.layers
import
nn_blocks
def
distribution_strategy_combinations
()
->
Iterable
[
Tuple
[
Any
,
...]]:
"""Returns the combinations of end-to-end tests to run."""
return
combinations
.
combine
(
distribution
=
[
strategy_combinations
.
default_strategy
,
strategy_combinations
.
cloud_tpu_strategy
,
strategy_combinations
.
one_device_strategy_gpu
,
],)
class
NNBlocksTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
nn_blocks
.
ResidualBlock
,
1
,
False
,
0.0
,
None
),
(
nn_blocks
.
ResidualBlock
,
2
,
True
,
0.2
,
0.25
),
)
def
test_residual_block_creation
(
self
,
block_fn
,
strides
,
use_projection
,
stochastic_depth_drop_rate
,
se_ratio
):
input_size
=
128
filter_size
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
filter_size
),
batch_size
=
1
)
block
=
block_fn
(
filter_size
,
strides
,
use_projection
=
use_projection
,
se_ratio
=
se_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
,
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
filter_size
],
features
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
nn_blocks
.
BottleneckBlock
,
1
,
False
,
0.0
,
None
),
(
nn_blocks
.
BottleneckBlock
,
2
,
True
,
0.2
,
0.25
),
)
def
test_bottleneck_block_creation
(
self
,
block_fn
,
strides
,
use_projection
,
stochastic_depth_drop_rate
,
se_ratio
):
input_size
=
128
filter_size
=
256
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
filter_size
*
4
),
batch_size
=
1
)
block
=
block_fn
(
filter_size
,
strides
,
use_projection
=
use_projection
,
se_ratio
=
se_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
filter_size
*
4
],
features
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
6
,
1
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
2
,
None
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
0.2
,
None
),
(
nn_blocks
.
InvertedBottleneckBlock
,
1
,
1
,
None
,
0.2
),
)
def
test_invertedbottleneck_block_creation
(
self
,
block_fn
,
expand_ratio
,
strides
,
se_ratio
,
stochastic_depth_drop_rate
):
input_size
=
128
in_filters
=
24
out_filters
=
40
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
in_filters
),
batch_size
=
1
)
block
=
block_fn
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
expand_ratio
=
expand_ratio
,
strides
=
strides
,
se_ratio
=
se_ratio
,
stochastic_depth_drop_rate
=
stochastic_depth_drop_rate
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
out_filters
],
features
.
shape
.
as_list
())
@
parameterized
.
parameters
(
(
nn_blocks
.
TuckerConvBlock
,
1
,
0.25
,
0.25
),
(
nn_blocks
.
TuckerConvBlock
,
2
,
0.25
,
0.25
),
)
def
test_tucker_conv_block
(
self
,
block_fn
,
strides
,
input_compression_ratio
,
output_compression_ratio
):
input_size
=
128
in_filters
=
24
out_filters
=
24
inputs
=
tf
.
keras
.
Input
(
shape
=
(
input_size
,
input_size
,
in_filters
),
batch_size
=
1
)
block
=
block_fn
(
in_filters
=
in_filters
,
out_filters
=
out_filters
,
input_compression_ratio
=
input_compression_ratio
,
output_compression_ratio
=
output_compression_ratio
,
strides
=
strides
)
features
=
block
(
inputs
)
self
.
assertAllEqual
(
[
1
,
input_size
//
strides
,
input_size
//
strides
,
out_filters
],
features
.
shape
.
as_list
())
class
ResidualInnerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_shape
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
test_layer
=
nn_blocks
.
ResidualInner
(
filters
,
strides
)
output
=
test_layer
(
input_tensor
)
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
class
BottleneckResidualInnerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_shape
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
test_layer
=
nn_blocks
.
BottleneckResidualInner
(
filters
,
strides
)
output
=
test_layer
(
input_tensor
)
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
*
4
]
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
class
DepthwiseSeparableConvBlockTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_shape
(
self
,
distribution
):
batch_size
,
height
,
width
,
num_channels
=
8
,
32
,
32
,
32
num_filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
normal
(
shape
=
[
batch_size
,
height
,
width
,
num_channels
])
with
distribution
.
scope
():
block
=
nn_blocks
.
DepthwiseSeparableConvBlock
(
num_filters
,
strides
=
strides
)
config_dict
=
block
.
get_config
()
recreate_block
=
nn_blocks
.
DepthwiseSeparableConvBlock
(
**
config_dict
)
output_tensor
=
block
(
input_tensor
)
expected_output_shape
=
[
batch_size
,
height
//
strides
,
width
//
strides
,
num_filters
]
self
.
assertEqual
(
output_tensor
.
shape
.
as_list
(),
expected_output_shape
)
output_tensor
=
recreate_block
(
input_tensor
)
self
.
assertEqual
(
output_tensor
.
shape
.
as_list
(),
expected_output_shape
)
class
ReversibleLayerTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_downsampling_non_reversible_step
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
64
strides
=
2
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
f
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
True
)
g
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
True
)
test_layer
=
nn_blocks
.
ReversibleLayer
(
f
,
g
)
test_layer
.
build
(
input_tensor
.
shape
)
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.01
)
@
tf
.
function
def
step_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
test_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
test_layer
.
trainable_variables
)
# Test applying gradients with optimizer works
optimizer
.
apply_gradients
(
zip
(
grads
,
test_layer
.
trainable_variables
))
return
output
replica_output
=
distribution
.
run
(
step_fn
)
outputs
=
distribution
.
experimental_local_results
(
replica_output
)
# Assert forward pass shape
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
for
output
in
outputs
:
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_reversible_step
(
self
,
distribution
):
# Reversible layers satisfy: (a) strides = 1 (b) in_filter = out_filter
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
c
strides
=
1
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
])
with
distribution
.
scope
():
f
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g
=
nn_blocks
.
ResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
test_layer
=
nn_blocks
.
ReversibleLayer
(
f
,
g
)
test_layer
(
input_tensor
,
training
=
False
)
# init weights
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
0.01
)
@
tf
.
function
def
step_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
test_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
test_layer
.
trainable_variables
)
# Test applying gradients with optimizer works
optimizer
.
apply_gradients
(
zip
(
grads
,
test_layer
.
trainable_variables
))
return
output
@
tf
.
function
def
fwd
():
test_layer
(
input_tensor
)
distribution
.
run
(
fwd
)
# Initialize variables
prev_variables
=
tf
.
identity_n
(
test_layer
.
trainable_variables
)
replica_output
=
distribution
.
run
(
step_fn
)
outputs
=
distribution
.
experimental_local_results
(
replica_output
)
# Assert variables values have changed values
for
v0
,
v1
in
zip
(
prev_variables
,
test_layer
.
trainable_variables
):
self
.
assertNotAllEqual
(
v0
,
v1
)
# Assert forward pass shape
expected_output_shape
=
[
bsz
,
h
//
strides
,
w
//
strides
,
filters
]
for
output
in
outputs
:
self
.
assertEqual
(
expected_output_shape
,
output
.
shape
.
as_list
())
@
combinations
.
generate
(
distribution_strategy_combinations
())
def
test_manual_gradients_correctness
(
self
,
distribution
):
bsz
,
h
,
w
,
c
=
8
,
32
,
32
,
32
filters
=
c
strides
=
1
input_tensor
=
tf
.
random
.
uniform
(
shape
=
[
bsz
,
h
,
w
,
c
*
4
])
# bottleneck
with
distribution
.
scope
():
f_manual
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g_manual
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
manual_grad_layer
=
nn_blocks
.
ReversibleLayer
(
f_manual
,
g_manual
)
manual_grad_layer
(
input_tensor
,
training
=
False
)
# init weights
f_auto
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
strides
,
batch_norm_first
=
False
)
g_auto
=
nn_blocks
.
BottleneckResidualInner
(
filters
=
filters
//
2
,
strides
=
1
,
batch_norm_first
=
False
)
auto_grad_layer
=
nn_blocks
.
ReversibleLayer
(
f_auto
,
g_auto
,
manual_grads
=
False
)
auto_grad_layer
(
input_tensor
)
# init weights
# Clone all weights (tf.keras.layers.Layer has no .clone())
auto_grad_layer
.
_f
.
set_weights
(
manual_grad_layer
.
_f
.
get_weights
())
auto_grad_layer
.
_g
.
set_weights
(
manual_grad_layer
.
_g
.
get_weights
())
@
tf
.
function
def
manual_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
manual_grad_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
manual_grad_layer
.
trainable_variables
)
return
grads
@
tf
.
function
def
auto_fn
():
with
tf
.
GradientTape
()
as
tape
:
output
=
auto_grad_layer
(
input_tensor
,
training
=
True
)
grads
=
tape
.
gradient
(
output
,
auto_grad_layer
.
trainable_variables
)
return
grads
manual_grads
=
distribution
.
run
(
manual_fn
)
auto_grads
=
distribution
.
run
(
auto_fn
)
# Assert gradients calculated manually are close to that from autograd
for
manual_grad
,
auto_grad
in
zip
(
manual_grads
,
auto_grads
):
self
.
assertAllClose
(
distribution
.
experimental_local_results
(
manual_grad
),
distribution
.
experimental_local_results
(
auto_grad
),
atol
=
5e-3
,
rtol
=
5e-3
)
# Verify that BN moving mean and variance is correct.
for
manual_var
,
auto_var
in
zip
(
manual_grad_layer
.
non_trainable_variables
,
auto_grad_layer
.
non_trainable_variables
):
self
.
assertAllClose
(
manual_var
,
auto_var
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/nn_layers.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common building blocks for neural networks."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Mapping
,
Optional
,
Tuple
,
Union
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
from
official.modeling
import
tf_utils
from
official.vision.ops
import
spatial_transform_ops
# Type annotations.
States
=
Dict
[
str
,
tf
.
Tensor
]
Activation
=
Union
[
str
,
Callable
]
def
make_divisible
(
value
:
float
,
divisor
:
int
,
min_value
:
Optional
[
float
]
=
None
,
round_down_protect
:
bool
=
True
,
)
->
int
:
"""This is to ensure that all layers have channels that are divisible by 8.
Args:
value: A `float` of original value.
divisor: An `int` of the divisor that need to be checked upon.
min_value: A `float` of minimum value threshold.
round_down_protect: A `bool` indicating whether round down more than 10%
will be allowed.
Returns:
The adjusted value in `int` that is divisible against divisor.
"""
if
min_value
is
None
:
min_value
=
divisor
new_value
=
max
(
min_value
,
int
(
value
+
divisor
/
2
)
//
divisor
*
divisor
)
# Make sure that round down does not go down by more than 10%.
if
round_down_protect
and
new_value
<
0.9
*
value
:
new_value
+=
divisor
return
int
(
new_value
)
def
round_filters
(
filters
:
int
,
multiplier
:
float
,
divisor
:
int
=
8
,
min_depth
:
Optional
[
int
]
=
None
,
round_down_protect
:
bool
=
True
,
skip
:
bool
=
False
)
->
int
:
"""Rounds number of filters based on width multiplier."""
orig_f
=
filters
if
skip
or
not
multiplier
:
return
filters
new_filters
=
make_divisible
(
value
=
filters
*
multiplier
,
divisor
=
divisor
,
min_value
=
min_depth
,
round_down_protect
=
round_down_protect
)
logging
.
info
(
'round_filter input=%s output=%s'
,
orig_f
,
new_filters
)
return
int
(
new_filters
)
def
get_padding_for_kernel_size
(
kernel_size
):
"""Compute padding size given kernel size."""
if
kernel_size
==
7
:
return
(
3
,
3
)
elif
kernel_size
==
3
:
return
(
1
,
1
)
else
:
raise
ValueError
(
'Padding for kernel size {} not known.'
.
format
(
kernel_size
))
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SqueezeExcitation
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a squeeze and excitation layer."""
def
__init__
(
self
,
in_filters
,
out_filters
,
se_ratio
,
divisible_by
=
1
,
use_3d_input
=
False
,
kernel_initializer
=
'VarianceScaling'
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activation
=
'relu'
,
gating_activation
=
'sigmoid'
,
round_down_protect
=
True
,
**
kwargs
):
"""Initializes a squeeze and excitation layer.
Args:
in_filters: An `int` number of filters of the input tensor.
out_filters: An `int` number of filters of the output tensor.
se_ratio: A `float` or None. If not None, se ratio for the squeeze and
excitation layer.
divisible_by: An `int` that ensures all inner dimensions are divisible by
this number.
use_3d_input: A `bool` of whether input is 2D or 3D image.
kernel_initializer: A `str` of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default to None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2d.
Default to None.
activation: A `str` name of the activation function.
gating_activation: A `str` name of the activation function for final
gating function.
round_down_protect: A `bool` of whether round down more than 10% will be
allowed.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
SqueezeExcitation
,
self
).
__init__
(
**
kwargs
)
self
.
_in_filters
=
in_filters
self
.
_out_filters
=
out_filters
self
.
_se_ratio
=
se_ratio
self
.
_divisible_by
=
divisible_by
self
.
_round_down_protect
=
round_down_protect
self
.
_use_3d_input
=
use_3d_input
self
.
_activation
=
activation
self
.
_gating_activation
=
gating_activation
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_bias_regularizer
=
bias_regularizer
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
if
not
use_3d_input
:
self
.
_spatial_axis
=
[
1
,
2
]
else
:
self
.
_spatial_axis
=
[
1
,
2
,
3
]
else
:
if
not
use_3d_input
:
self
.
_spatial_axis
=
[
2
,
3
]
else
:
self
.
_spatial_axis
=
[
2
,
3
,
4
]
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
self
.
_gating_activation_fn
=
tf_utils
.
get_activation
(
gating_activation
)
def
build
(
self
,
input_shape
):
num_reduced_filters
=
make_divisible
(
max
(
1
,
int
(
self
.
_in_filters
*
self
.
_se_ratio
)),
divisor
=
self
.
_divisible_by
,
round_down_protect
=
self
.
_round_down_protect
)
self
.
_se_reduce
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
num_reduced_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
True
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
self
.
_se_expand
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_out_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
'same'
,
use_bias
=
True
,
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
bias_regularizer
=
self
.
_bias_regularizer
)
super
(
SqueezeExcitation
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
'in_filters'
:
self
.
_in_filters
,
'out_filters'
:
self
.
_out_filters
,
'se_ratio'
:
self
.
_se_ratio
,
'divisible_by'
:
self
.
_divisible_by
,
'use_3d_input'
:
self
.
_use_3d_input
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'bias_regularizer'
:
self
.
_bias_regularizer
,
'activation'
:
self
.
_activation
,
'gating_activation'
:
self
.
_gating_activation
,
'round_down_protect'
:
self
.
_round_down_protect
,
}
base_config
=
super
(
SqueezeExcitation
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
x
=
tf
.
reduce_mean
(
inputs
,
self
.
_spatial_axis
,
keepdims
=
True
)
x
=
self
.
_activation_fn
(
self
.
_se_reduce
(
x
))
x
=
self
.
_gating_activation_fn
(
self
.
_se_expand
(
x
))
return
x
*
inputs
def
get_stochastic_depth_rate
(
init_rate
,
i
,
n
):
"""Get drop connect rate for the ith block.
Args:
init_rate: A `float` of initial drop rate.
i: An `int` of order of the current block.
n: An `int` total number of blocks.
Returns:
Drop rate of the ith block.
"""
if
init_rate
is
not
None
:
if
init_rate
<
0
or
init_rate
>
1
:
raise
ValueError
(
'Initial drop rate must be within 0 and 1.'
)
rate
=
init_rate
*
float
(
i
)
/
n
else
:
rate
=
None
return
rate
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
StochasticDepth
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a stochastic depth layer."""
def
__init__
(
self
,
stochastic_depth_drop_rate
,
**
kwargs
):
"""Initializes a stochastic depth layer.
Args:
stochastic_depth_drop_rate: A `float` of drop rate.
**kwargs: Additional keyword arguments to be passed.
Returns:
A output `tf.Tensor` of which should have the same shape as input.
"""
super
(
StochasticDepth
,
self
).
__init__
(
**
kwargs
)
self
.
_drop_rate
=
stochastic_depth_drop_rate
def
get_config
(
self
):
config
=
{
'drop_rate'
:
self
.
_drop_rate
}
base_config
=
super
(
StochasticDepth
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
,
training
=
None
):
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
if
not
training
or
self
.
_drop_rate
is
None
or
self
.
_drop_rate
==
0
:
return
inputs
keep_prob
=
1.0
-
self
.
_drop_rate
batch_size
=
tf
.
shape
(
inputs
)[
0
]
random_tensor
=
keep_prob
random_tensor
+=
tf
.
random
.
uniform
(
[
batch_size
]
+
[
1
]
*
(
inputs
.
shape
.
rank
-
1
),
dtype
=
inputs
.
dtype
)
binary_tensor
=
tf
.
floor
(
random_tensor
)
output
=
tf
.
math
.
divide
(
inputs
,
keep_prob
)
*
binary_tensor
return
output
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
def
pyramid_feature_fusion
(
inputs
,
target_level
):
"""Fuses all feature maps in the feature pyramid at the target level.
Args:
inputs: A dictionary containing the feature pyramid. The size of the input
tensor needs to be fixed.
target_level: An `int` of the target feature level for feature fusion.
Returns:
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
# Convert keys to int.
pyramid_feats
=
{
int
(
k
):
v
for
k
,
v
in
inputs
.
items
()}
min_level
=
min
(
pyramid_feats
.
keys
())
max_level
=
max
(
pyramid_feats
.
keys
())
resampled_feats
=
[]
for
l
in
range
(
min_level
,
max_level
+
1
):
if
l
==
target_level
:
resampled_feats
.
append
(
pyramid_feats
[
l
])
else
:
feat
=
pyramid_feats
[
l
]
target_size
=
list
(
feat
.
shape
[
1
:
3
])
target_size
[
0
]
*=
2
**
(
l
-
target_level
)
target_size
[
1
]
*=
2
**
(
l
-
target_level
)
# Casts feat to float32 so the resize op can be run on TPU.
feat
=
tf
.
cast
(
feat
,
tf
.
float32
)
feat
=
tf
.
image
.
resize
(
feat
,
size
=
target_size
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
# Casts it back to be compatible with the rest opetations.
feat
=
tf
.
cast
(
feat
,
pyramid_feats
[
l
].
dtype
)
resampled_feats
.
append
(
feat
)
return
tf
.
math
.
add_n
(
resampled_feats
)
class
PanopticFPNFusion
(
tf
.
keras
.
Model
):
"""Creates a Panoptic FPN feature Fusion layer.
This implements feature fusion for semantic segmentation head from the paper:
Alexander Kirillov, Ross Girshick, Kaiming He and Piotr Dollar.
Panoptic Feature Pyramid Networks.
(https://arxiv.org/pdf/1901.02446.pdf)
"""
def
__init__
(
self
,
min_level
:
int
=
2
,
max_level
:
int
=
5
,
target_level
:
int
=
2
,
num_filters
:
int
=
128
,
num_fpn_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes panoptic FPN feature fusion layer.
Args:
min_level: An `int` of minimum level to use in feature fusion.
max_level: An `int` of maximum level to use in feature fusion.
target_level: An `int` of the target feature level for feature fusion.
num_filters: An `int` number of filters in conv2d layers.
num_fpn_filters: An `int` number of filters in the FPN outputs
activation: A `str` name of the activation function.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
Returns:
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
if
target_level
>
max_level
:
raise
ValueError
(
'target_level should be less than max_level'
)
self
.
_config_dict
=
{
'min_level'
:
min_level
,
'max_level'
:
max_level
,
'target_level'
:
target_level
,
'num_filters'
:
num_filters
,
'num_fpn_filters'
:
num_fpn_filters
,
'activation'
:
activation
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
}
norm
=
tfa
.
layers
.
GroupNormalization
conv2d
=
tf
.
keras
.
layers
.
Conv2D
activation_fn
=
tf_utils
.
get_activation
(
activation
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
norm_axis
=
-
1
else
:
norm_axis
=
1
inputs
=
self
.
_build_inputs
(
num_fpn_filters
,
min_level
,
max_level
)
upscaled_features
=
[]
for
level
in
range
(
min_level
,
max_level
+
1
):
num_conv_layers
=
max
(
1
,
level
-
target_level
)
x
=
inputs
[
str
(
level
)]
for
i
in
range
(
num_conv_layers
):
x
=
conv2d
(
filters
=
num_filters
,
kernel_size
=
3
,
padding
=
'same'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(),
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
)(
x
)
x
=
norm
(
groups
=
32
,
axis
=
norm_axis
)(
x
)
x
=
activation_fn
(
x
)
if
level
!=
target_level
:
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
2
)
upscaled_features
.
append
(
x
)
fused_features
=
tf
.
math
.
add_n
(
upscaled_features
)
self
.
_output_specs
=
{
str
(
target_level
):
fused_features
.
get_shape
()}
super
(
PanopticFPNFusion
,
self
).
__init__
(
inputs
=
inputs
,
outputs
=
fused_features
,
**
kwargs
)
def
_build_inputs
(
self
,
num_filters
:
int
,
min_level
:
int
,
max_level
:
int
):
inputs
=
{}
for
level
in
range
(
min_level
,
max_level
+
1
):
inputs
[
str
(
level
)]
=
tf
.
keras
.
Input
(
shape
=
[
None
,
None
,
num_filters
])
return
inputs
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
@
property
def
output_specs
(
self
)
->
Mapping
[
str
,
tf
.
TensorShape
]:
"""A dict of {level: TensorShape} pairs for the model output."""
return
self
.
_output_specs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
Scale
(
tf
.
keras
.
layers
.
Layer
):
"""Scales the input by a trainable scalar weight.
This is useful for applying ReZero to layers, which improves convergence
speed. This implements the paper:
ReZero is All You Need: Fast Convergence at Large Depth.
(https://arxiv.org/pdf/2003.04887.pdf).
"""
def
__init__
(
self
,
initializer
:
tf
.
keras
.
initializers
.
Initializer
=
'ones'
,
regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a scale layer.
Args:
initializer: A `str` of initializer for the scalar weight.
regularizer: A `tf.keras.regularizers.Regularizer` for the scalar weight.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An `tf.Tensor` of which should have the same shape as input.
"""
super
(
Scale
,
self
).
__init__
(
**
kwargs
)
self
.
_initializer
=
initializer
self
.
_regularizer
=
regularizer
self
.
_scale
=
self
.
add_weight
(
name
=
'scale'
,
shape
=
[],
dtype
=
self
.
dtype
,
initializer
=
self
.
_initializer
,
regularizer
=
self
.
_regularizer
,
trainable
=
True
)
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'initializer'
:
self
.
_initializer
,
'regularizer'
:
self
.
_regularizer
,
}
base_config
=
super
(
Scale
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
"""Calls the layer with the given inputs."""
scale
=
tf
.
cast
(
self
.
_scale
,
inputs
.
dtype
)
return
scale
*
inputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
TemporalSoftmaxPool
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a network layer corresponding to temporal softmax pooling.
This is useful for multi-class logits (used in e.g., Charades). Modified from
AssembleNet Charades evaluation from:
Michael S. Ryoo, AJ Piergiovanni, Mingxing Tan, Anelia Angelova.
AssembleNet: Searching for Multi-Stream Neural Connectivity in Video
Architectures.
(https://arxiv.org/pdf/1905.13209.pdf).
"""
def
call
(
self
,
inputs
):
"""Calls the layer with the given inputs."""
assert
inputs
.
shape
.
rank
in
(
3
,
4
,
5
)
frames
=
tf
.
shape
(
inputs
)[
1
]
pre_logits
=
inputs
/
tf
.
sqrt
(
tf
.
cast
(
frames
,
inputs
.
dtype
))
activations
=
tf
.
nn
.
softmax
(
pre_logits
,
axis
=
1
)
outputs
=
inputs
*
activations
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
PositionalEncoding
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a network layer that adds a sinusoidal positional encoding.
Positional encoding is incremented across frames, and is added to the input.
The positional encoding is first weighted at 0 so that the network can choose
to ignore it. This implements:
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin.
Attention Is All You Need.
(https://arxiv.org/pdf/1706.03762.pdf).
"""
def
__init__
(
self
,
initializer
:
tf
.
keras
.
initializers
.
Initializer
=
'zeros'
,
cache_encoding
:
bool
=
False
,
state_prefix
:
Optional
[
str
]
=
None
,
**
kwargs
):
"""Initializes positional encoding.
Args:
initializer: A `str` of initializer for weighting the positional encoding.
cache_encoding: A `bool`. If True, cache the positional encoding tensor
after calling build. Otherwise, rebuild the tensor for every call.
Setting this to False can be useful when we want to input a variable
number of frames, so the positional encoding tensor can change shape.
state_prefix: a prefix string to identify states.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
A `tf.Tensor` of which should have the same shape as input.
"""
super
(
PositionalEncoding
,
self
).
__init__
(
**
kwargs
)
self
.
_initializer
=
initializer
self
.
_cache_encoding
=
cache_encoding
self
.
_pos_encoding
=
None
self
.
_rezero
=
Scale
(
initializer
=
initializer
,
name
=
'rezero'
)
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
self
.
_state_prefix
=
state_prefix
self
.
_frame_count_name
=
f
'
{
state_prefix
}
_pos_enc_frame_count'
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'initializer'
:
self
.
_initializer
,
'cache_encoding'
:
self
.
_cache_encoding
,
'state_prefix'
:
self
.
_state_prefix
,
}
base_config
=
super
(
PositionalEncoding
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
_positional_encoding
(
self
,
num_positions
:
Union
[
int
,
tf
.
Tensor
],
hidden_size
:
Union
[
int
,
tf
.
Tensor
],
start_position
:
Union
[
int
,
tf
.
Tensor
]
=
0
,
dtype
:
str
=
'float32'
)
->
tf
.
Tensor
:
"""Creates a sequence of sinusoidal positional encoding vectors.
Args:
num_positions: the total number of positions (frames).
hidden_size: the number of channels used for the hidden vectors.
start_position: the start position.
dtype: the dtype of the output tensor.
Returns:
The positional encoding tensor with shape [num_positions, hidden_size].
"""
if
isinstance
(
start_position
,
tf
.
Tensor
)
and
start_position
.
shape
.
rank
==
1
:
start_position
=
start_position
[
0
]
# Calling `tf.range` with `dtype=tf.bfloat16` results in an error,
# so we cast afterward.
positions
=
tf
.
range
(
start_position
,
start_position
+
num_positions
)
positions
=
tf
.
cast
(
positions
,
dtype
)[:,
tf
.
newaxis
]
idx
=
tf
.
range
(
hidden_size
)[
tf
.
newaxis
,
:]
power
=
tf
.
cast
(
2
*
(
idx
//
2
),
dtype
)
power
/=
tf
.
cast
(
hidden_size
,
dtype
)
angles
=
1.
/
tf
.
math
.
pow
(
10_000.
,
power
)
radians
=
positions
*
angles
sin
=
tf
.
math
.
sin
(
radians
[:,
0
::
2
])
cos
=
tf
.
math
.
cos
(
radians
[:,
1
::
2
])
pos_encoding
=
tf
.
concat
([
sin
,
cos
],
axis
=-
1
)
return
pos_encoding
def
_get_pos_encoding
(
self
,
input_shape
:
tf
.
Tensor
,
frame_count
:
int
=
0
)
->
tf
.
Tensor
:
"""Calculates the positional encoding from the input shape.
Args:
input_shape: the shape of the input.
frame_count: a count of frames that indicates the index of the first
frame.
Returns:
The positional encoding tensor with shape [num_positions, hidden_size].
"""
frames
=
input_shape
[
1
]
channels
=
input_shape
[
-
1
]
pos_encoding
=
self
.
_positional_encoding
(
frames
,
channels
,
start_position
=
frame_count
,
dtype
=
self
.
dtype
)
pos_encoding
=
tf
.
reshape
(
pos_encoding
,
[
1
,
frames
,
1
,
1
,
channels
])
return
pos_encoding
def
build
(
self
,
input_shape
):
"""Builds the layer with the given input shape.
Args:
input_shape: The input shape.
Raises:
ValueError: If using 'channels_first' data format.
"""
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
raise
ValueError
(
'"channels_first" mode is unsupported.'
)
if
self
.
_cache_encoding
:
self
.
_pos_encoding
=
self
.
_get_pos_encoding
(
input_shape
)
super
(
PositionalEncoding
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
states
:
Optional
[
States
]
=
None
,
output_states
:
bool
=
True
,
)
->
Union
[
tf
.
Tensor
,
Tuple
[
tf
.
Tensor
,
States
]]:
"""Calls the layer with the given inputs.
Args:
inputs: An input `tf.Tensor`.
states: A `dict` of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s). Expected keys
include `state_prefix + '_pos_enc_frame_count'`.
output_states: A `bool`. If True, returns the output tensor and output
states. Returns just the output tensor otherwise.
Returns:
An output `tf.Tensor` (and optionally the states if `output_states=True`).
Raises:
ValueError: If using 'channels_first' data format.
"""
states
=
dict
(
states
)
if
states
is
not
None
else
{}
# Keep a count of frames encountered across input iterations in
# num_frames to be able to accurately update the positional encoding.
num_frames
=
tf
.
shape
(
inputs
)[
1
]
frame_count
=
tf
.
cast
(
states
.
get
(
self
.
_frame_count_name
,
[
0
]),
tf
.
int32
)
states
[
self
.
_frame_count_name
]
=
frame_count
+
num_frames
if
self
.
_cache_encoding
:
pos_encoding
=
self
.
_pos_encoding
else
:
pos_encoding
=
self
.
_get_pos_encoding
(
tf
.
shape
(
inputs
),
frame_count
=
frame_count
)
pos_encoding
=
tf
.
cast
(
pos_encoding
,
inputs
.
dtype
)
pos_encoding
=
self
.
_rezero
(
pos_encoding
)
outputs
=
inputs
+
pos_encoding
return
(
outputs
,
states
)
if
output_states
else
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
GlobalAveragePool3D
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a global average pooling layer with causal mode.
Implements causal mode, which runs a cumulative sum (with `tf.cumsum`) across
frames in the time dimension, allowing the use of a stream buffer. Sums any
valid input state with the current input to allow state to accumulate over
several iterations.
"""
def
__init__
(
self
,
keepdims
:
bool
=
False
,
causal
:
bool
=
False
,
state_prefix
:
Optional
[
str
]
=
None
,
**
kwargs
):
"""Initializes a global average pool layer.
Args:
keepdims: A `bool`. If True, keep the averaged dimensions.
causal: A `bool` of whether to run in causal mode with a cumulative sum
across frames.
state_prefix: a prefix string to identify states.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An output `tf.Tensor`.
"""
super
(
GlobalAveragePool3D
,
self
).
__init__
(
**
kwargs
)
self
.
_keepdims
=
keepdims
self
.
_causal
=
causal
state_prefix
=
state_prefix
if
state_prefix
is
not
None
else
''
self
.
_state_prefix
=
state_prefix
self
.
_state_name
=
f
'
{
state_prefix
}
_pool_buffer'
self
.
_frame_count_name
=
f
'
{
state_prefix
}
_pool_frame_count'
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'keepdims'
:
self
.
_keepdims
,
'causal'
:
self
.
_causal
,
'state_prefix'
:
self
.
_state_prefix
,
}
base_config
=
super
(
GlobalAveragePool3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
:
tf
.
Tensor
,
states
:
Optional
[
States
]
=
None
,
output_states
:
bool
=
True
)
->
Union
[
tf
.
Tensor
,
Tuple
[
tf
.
Tensor
,
States
]]:
"""Calls the layer with the given inputs.
Args:
inputs: An input `tf.Tensor`.
states: A `dict` of states such that, if any of the keys match for this
layer, will overwrite the contents of the buffer(s).
Expected keys include `state_prefix + '__pool_buffer'` and
`state_prefix + '__pool_frame_count'`.
output_states: A `bool`. If True, returns the output tensor and output
states. Returns just the output tensor otherwise.
Returns:
An output `tf.Tensor` (and optionally the states if `output_states=True`).
If `causal=True`, the output tensor will have shape
`[batch_size, num_frames, 1, 1, channels]` if `keepdims=True`. We keep
the frame dimension in this case to simulate a cumulative global average
as if we are inputting one frame at a time. If `causal=False`, the output
is equivalent to `tf.keras.layers.GlobalAveragePooling3D` with shape
`[batch_size, 1, 1, 1, channels]` if `keepdims=True` (plus the optional
buffer stored in `states`).
Raises:
ValueError: If using 'channels_first' data format.
"""
states
=
dict
(
states
)
if
states
is
not
None
else
{}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
raise
ValueError
(
'"channels_first" mode is unsupported.'
)
# Shape: [batch_size, 1, 1, 1, channels]
buffer
=
states
.
get
(
self
.
_state_name
,
None
)
if
buffer
is
None
:
buffer
=
tf
.
zeros_like
(
inputs
[:,
:
1
,
:
1
,
:
1
],
dtype
=
inputs
.
dtype
)
states
[
self
.
_state_name
]
=
buffer
# Keep a count of frames encountered across input iterations in
# num_frames to be able to accurately take a cumulative average across
# all frames when running in streaming mode
num_frames
=
tf
.
shape
(
inputs
)[
1
]
frame_count
=
states
.
get
(
self
.
_frame_count_name
,
tf
.
constant
([
0
]))
frame_count
=
tf
.
cast
(
frame_count
,
tf
.
int32
)
states
[
self
.
_frame_count_name
]
=
frame_count
+
num_frames
if
self
.
_causal
:
# Take a mean of spatial dimensions to make computation more efficient.
x
=
tf
.
reduce_mean
(
inputs
,
axis
=
[
2
,
3
],
keepdims
=
True
)
x
=
tf
.
cumsum
(
x
,
axis
=
1
)
x
=
x
+
buffer
# The last frame will be the value of the next state
# Shape: [batch_size, 1, 1, 1, channels]
states
[
self
.
_state_name
]
=
x
[:,
-
1
:]
# In causal mode, the divisor increments by 1 for every frame to
# calculate cumulative averages instead of one global average
mean_divisors
=
tf
.
range
(
num_frames
)
+
frame_count
+
1
mean_divisors
=
tf
.
reshape
(
mean_divisors
,
[
1
,
num_frames
,
1
,
1
,
1
])
mean_divisors
=
tf
.
cast
(
mean_divisors
,
x
.
dtype
)
# Shape: [batch_size, num_frames, 1, 1, channels]
x
=
x
/
mean_divisors
else
:
# In non-causal mode, we (optionally) sum across frames to take a
# cumulative average across input iterations rather than individual
# frames. If no buffer state is passed, this essentially becomes
# regular global average pooling.
# Shape: [batch_size, 1, 1, 1, channels]
x
=
tf
.
reduce_sum
(
inputs
,
axis
=
(
1
,
2
,
3
),
keepdims
=
True
)
x
=
x
/
tf
.
cast
(
tf
.
shape
(
inputs
)[
2
]
*
tf
.
shape
(
inputs
)[
3
],
x
.
dtype
)
x
=
x
+
buffer
# Shape: [batch_size, 1, 1, 1, channels]
states
[
self
.
_state_name
]
=
x
x
=
x
/
tf
.
cast
(
frame_count
+
num_frames
,
x
.
dtype
)
if
not
self
.
_keepdims
:
x
=
tf
.
squeeze
(
x
,
axis
=
(
1
,
2
,
3
))
return
(
x
,
states
)
if
output_states
else
x
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpatialAveragePool3D
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a global average pooling layer pooling across spatial dimentions."""
def
__init__
(
self
,
keepdims
:
bool
=
False
,
**
kwargs
):
"""Initializes a global average pool layer.
Args:
keepdims: A `bool`. If True, keep the averaged dimensions.
**kwargs: Additional keyword arguments to be passed to this layer.
Returns:
An output `tf.Tensor`.
"""
super
(
SpatialAveragePool3D
,
self
).
__init__
(
**
kwargs
)
self
.
_keepdims
=
keepdims
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'keepdims'
:
self
.
_keepdims
,
}
base_config
=
super
(
SpatialAveragePool3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
build
(
self
,
input_shape
):
"""Builds the layer with the given input shape."""
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
raise
ValueError
(
'"channels_first" mode is unsupported.'
)
super
(
SpatialAveragePool3D
,
self
).
build
(
input_shape
)
def
call
(
self
,
inputs
):
"""Calls the layer with the given inputs."""
if
inputs
.
shape
.
rank
!=
5
:
raise
ValueError
(
'Input should have rank {}, got {}'
.
format
(
5
,
inputs
.
shape
.
rank
))
return
tf
.
reduce_mean
(
inputs
,
axis
=
(
2
,
3
),
keepdims
=
self
.
_keepdims
)
class
CausalConvMixin
:
"""Mixin class to implement CausalConv for `tf.keras.layers.Conv` layers."""
@
property
def
use_buffered_input
(
self
)
->
bool
:
return
self
.
_use_buffered_input
@
use_buffered_input
.
setter
def
use_buffered_input
(
self
,
variable
:
bool
):
self
.
_use_buffered_input
=
variable
def
_compute_buffered_causal_padding
(
self
,
inputs
:
tf
.
Tensor
,
use_buffered_input
:
bool
=
False
,
time_axis
:
int
=
1
,
)
->
List
[
List
[
int
]]:
"""Calculates padding for 'causal' option for conv layers.
Args:
inputs: An optional input `tf.Tensor` to be padded.
use_buffered_input: A `bool`. If True, use 'valid' padding along the time
dimension. This should be set when applying the stream buffer.
time_axis: An `int` of the axis of the time dimension.
Returns:
A list of paddings for `tf.pad`.
"""
input_shape
=
tf
.
shape
(
inputs
)[
1
:
-
1
]
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
raise
ValueError
(
'"channels_first" mode is unsupported.'
)
kernel_size_effective
=
[
(
self
.
kernel_size
[
i
]
+
(
self
.
kernel_size
[
i
]
-
1
)
*
(
self
.
dilation_rate
[
i
]
-
1
))
for
i
in
range
(
self
.
rank
)
]
pad_total
=
[
kernel_size_effective
[
0
]
-
1
]
for
i
in
range
(
1
,
self
.
rank
):
overlap
=
(
input_shape
[
i
]
-
1
)
%
self
.
strides
[
i
]
+
1
pad_total
.
append
(
tf
.
maximum
(
kernel_size_effective
[
i
]
-
overlap
,
0
))
pad_beg
=
[
pad_total
[
i
]
//
2
for
i
in
range
(
self
.
rank
)]
pad_end
=
[
pad_total
[
i
]
-
pad_beg
[
i
]
for
i
in
range
(
self
.
rank
)]
padding
=
[[
pad_beg
[
i
],
pad_end
[
i
]]
for
i
in
range
(
self
.
rank
)]
padding
=
[[
0
,
0
]]
+
padding
+
[[
0
,
0
]]
if
use_buffered_input
:
padding
[
time_axis
]
=
[
0
,
0
]
else
:
padding
[
time_axis
]
=
[
padding
[
time_axis
][
0
]
+
padding
[
time_axis
][
1
],
0
]
return
padding
def
_causal_validate_init
(
self
):
"""Validates the Conv layer initial configuration."""
# Overriding this method is meant to circumvent unnecessary errors when
# using causal padding.
if
(
self
.
filters
is
not
None
and
self
.
filters
%
self
.
groups
!=
0
):
raise
ValueError
(
'The number of filters must be evenly divisible by the number of '
'groups. Received: groups={}, filters={}'
.
format
(
self
.
groups
,
self
.
filters
))
if
not
all
(
self
.
kernel_size
):
raise
ValueError
(
'The argument `kernel_size` cannot contain 0(s). '
'Received: %s'
%
(
self
.
kernel_size
,))
def
_buffered_spatial_output_shape
(
self
,
spatial_output_shape
:
List
[
int
]):
"""Computes the spatial output shape from the input shape."""
# When buffer padding, use 'valid' padding across time. The output shape
# across time should be the input shape minus any padding, assuming
# the stride across time is 1.
if
self
.
_use_buffered_input
and
spatial_output_shape
[
0
]
is
not
None
:
padding
=
self
.
_compute_buffered_causal_padding
(
tf
.
zeros
([
1
]
+
spatial_output_shape
+
[
1
]),
use_buffered_input
=
False
)
spatial_output_shape
[
0
]
-=
sum
(
padding
[
1
])
return
spatial_output_shape
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
Conv2D
(
tf
.
keras
.
layers
.
Conv2D
,
CausalConvMixin
):
"""Conv2D layer supporting CausalConv.
Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`),
which applies causal padding to the temporal dimension, and same padding in
the spatial dimensions.
"""
def
__init__
(
self
,
*
args
,
use_buffered_input
=
False
,
**
kwargs
):
"""Initializes conv2d.
Args:
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
An output `tf.Tensor` of the Conv2D operation.
"""
super
(
Conv2D
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_use_buffered_input
=
use_buffered_input
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'use_buffered_input'
:
self
.
_use_buffered_input
,
}
base_config
=
super
(
Conv2D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
_compute_causal_padding
(
self
,
inputs
):
"""Computes causal padding dimensions for the given inputs."""
return
self
.
_compute_buffered_causal_padding
(
inputs
,
use_buffered_input
=
self
.
_use_buffered_input
)
def
_validate_init
(
self
):
"""Validates the Conv layer initial configuration."""
self
.
_causal_validate_init
()
def
_spatial_output_shape
(
self
,
spatial_input_shape
:
List
[
int
]):
"""Computes the spatial output shape from the input shape."""
shape
=
super
(
Conv2D
,
self
).
_spatial_output_shape
(
spatial_input_shape
)
return
self
.
_buffered_spatial_output_shape
(
shape
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
DepthwiseConv2D
(
tf
.
keras
.
layers
.
DepthwiseConv2D
,
CausalConvMixin
):
"""DepthwiseConv2D layer supporting CausalConv.
Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`),
which applies causal padding to the temporal dimension, and same padding in
the spatial dimensions.
"""
def
__init__
(
self
,
*
args
,
use_buffered_input
=
False
,
**
kwargs
):
"""Initializes depthwise conv2d.
Args:
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
An output `tf.Tensor` of the DepthwiseConv2D operation.
"""
super
(
DepthwiseConv2D
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_use_buffered_input
=
use_buffered_input
# Causal padding is unsupported by default for DepthwiseConv2D,
# so we resort to valid padding internally. However, we handle
# causal padding as a special case with `self._is_causal`, which is
# defined by the super class.
if
self
.
padding
==
'causal'
:
self
.
padding
=
'valid'
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'use_buffered_input'
:
self
.
_use_buffered_input
,
}
base_config
=
super
(
DepthwiseConv2D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
"""Calls the layer with the given inputs."""
if
self
.
_is_causal
:
inputs
=
tf
.
pad
(
inputs
,
self
.
_compute_causal_padding
(
inputs
))
return
super
(
DepthwiseConv2D
,
self
).
call
(
inputs
)
def
_compute_causal_padding
(
self
,
inputs
):
"""Computes causal padding dimensions for the given inputs."""
return
self
.
_compute_buffered_causal_padding
(
inputs
,
use_buffered_input
=
self
.
_use_buffered_input
)
def
_validate_init
(
self
):
"""Validates the Conv layer initial configuration."""
self
.
_causal_validate_init
()
def
_spatial_output_shape
(
self
,
spatial_input_shape
:
List
[
int
]):
"""Computes the spatial output shape from the input shape."""
shape
=
super
(
DepthwiseConv2D
,
self
).
_spatial_output_shape
(
spatial_input_shape
)
return
self
.
_buffered_spatial_output_shape
(
shape
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
Conv3D
(
tf
.
keras
.
layers
.
Conv3D
,
CausalConvMixin
):
"""Conv3D layer supporting CausalConv.
Supports `padding='causal'` option (like in `tf.keras.layers.Conv1D`),
which applies causal padding to the temporal dimension, and same padding in
the spatial dimensions.
"""
def
__init__
(
self
,
*
args
,
use_buffered_input
=
False
,
**
kwargs
):
"""Initializes conv3d.
Args:
*args: Arguments to be passed.
use_buffered_input: A `bool`. If True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding.
**kwargs: Additional keyword arguments to be passed.
Returns:
An output `tf.Tensor` of the Conv3D operation.
"""
super
(
Conv3D
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_use_buffered_input
=
use_buffered_input
def
get_config
(
self
):
"""Returns a dictionary containing the config used for initialization."""
config
=
{
'use_buffered_input'
:
self
.
_use_buffered_input
,
}
base_config
=
super
(
Conv3D
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
"""Call the layer with the given inputs."""
# Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
# supported when compiling with TF graph (XLA) using tf.function, so it
# is compiled by default here (b/186463870).
conv_fn
=
tf
.
function
(
super
(
Conv3D
,
self
).
call
,
jit_compile
=
True
)
return
conv_fn
(
inputs
)
def
_compute_causal_padding
(
self
,
inputs
):
"""Computes causal padding dimensions for the given inputs."""
return
self
.
_compute_buffered_causal_padding
(
inputs
,
use_buffered_input
=
self
.
_use_buffered_input
)
def
_validate_init
(
self
):
"""Validates the Conv layer initial configuration."""
self
.
_causal_validate_init
()
def
_spatial_output_shape
(
self
,
spatial_input_shape
:
List
[
int
]):
"""Computes the spatial output shape from the input shape."""
shape
=
super
(
Conv3D
,
self
).
_spatial_output_shape
(
spatial_input_shape
)
return
self
.
_buffered_spatial_output_shape
(
shape
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SpatialPyramidPooling
(
tf
.
keras
.
layers
.
Layer
):
"""Implements the Atrous Spatial Pyramid Pooling.
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
"""
def
__init__
(
self
,
output_channels
:
int
,
dilation_rates
:
List
[
int
],
pool_kernel_size
:
Optional
[
List
[
int
]]
=
None
,
use_sync_bn
:
bool
=
False
,
batchnorm_momentum
:
float
=
0.99
,
batchnorm_epsilon
:
float
=
0.001
,
activation
:
str
=
'relu'
,
dropout
:
float
=
0.5
,
kernel_initializer
:
str
=
'GlorotUniform'
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
interpolation
:
str
=
'bilinear'
,
use_depthwise_convolution
:
bool
=
False
,
**
kwargs
):
"""Initializes `SpatialPyramidPooling`.
Args:
output_channels: Number of channels produced by SpatialPyramidPooling.
dilation_rates: A list of integers for parallel dilated conv.
pool_kernel_size: A list of integers or None. If None, global average
pooling is applied, otherwise an average pooling of pool_kernel_size is
applied.
use_sync_bn: A bool, whether or not to use sync batch normalization.
batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
0.99.
batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
activation: A `str` for type of activation to be used. Defaults to 'relu'.
dropout: A float for the dropout rate before output. Defaults to 0.5.
kernel_initializer: Kernel initializer for conv layers. Defaults to
`glorot_uniform`.
kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
interpolation: The interpolation method for upsampling. Defaults to
`bilinear`.
use_depthwise_convolution: Allows spatial pooling to be separable
depthwise convolusions. [Encoder-Decoder with Atrous Separable
Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1802.02611.pdf)
**kwargs: Other keyword arguments for the layer.
"""
super
().
__init__
(
**
kwargs
)
self
.
_output_channels
=
output_channels
self
.
_dilation_rates
=
dilation_rates
self
.
_use_sync_bn
=
use_sync_bn
self
.
_batchnorm_momentum
=
batchnorm_momentum
self
.
_batchnorm_epsilon
=
batchnorm_epsilon
self
.
_activation
=
activation
self
.
_dropout
=
dropout
self
.
_kernel_initializer
=
kernel_initializer
self
.
_kernel_regularizer
=
kernel_regularizer
self
.
_interpolation
=
interpolation
self
.
_pool_kernel_size
=
pool_kernel_size
self
.
_use_depthwise_convolution
=
use_depthwise_convolution
self
.
_activation_fn
=
tf_utils
.
get_activation
(
activation
)
if
self
.
_use_sync_bn
:
self
.
_bn_op
=
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
else
:
self
.
_bn_op
=
tf
.
keras
.
layers
.
BatchNormalization
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
def
build
(
self
,
input_shape
):
height
=
input_shape
[
1
]
width
=
input_shape
[
2
]
channels
=
input_shape
[
3
]
self
.
aspp_layers
=
[]
conv1
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_bias
=
False
)
norm1
=
self
.
_bn_op
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_batchnorm_momentum
,
epsilon
=
self
.
_batchnorm_epsilon
)
self
.
aspp_layers
.
append
([
conv1
,
norm1
])
for
dilation_rate
in
self
.
_dilation_rates
:
leading_layers
=
[]
kernel_size
=
(
3
,
3
)
if
self
.
_use_depthwise_convolution
:
leading_layers
+=
[
tf
.
keras
.
layers
.
DepthwiseConv2D
(
depth_multiplier
=
1
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
depthwise_regularizer
=
self
.
_kernel_regularizer
,
depthwise_initializer
=
self
.
_kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
)
]
kernel_size
=
(
1
,
1
)
conv_dilation
=
leading_layers
+
[
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_output_channels
,
kernel_size
=
kernel_size
,
padding
=
'same'
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
kernel_initializer
=
self
.
_kernel_initializer
,
dilation_rate
=
dilation_rate
,
use_bias
=
False
)
]
norm_dilation
=
self
.
_bn_op
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_batchnorm_momentum
,
epsilon
=
self
.
_batchnorm_epsilon
)
self
.
aspp_layers
.
append
(
conv_dilation
+
[
norm_dilation
])
if
self
.
_pool_kernel_size
is
None
:
pooling
=
[
tf
.
keras
.
layers
.
GlobalAveragePooling2D
(),
tf
.
keras
.
layers
.
Reshape
((
1
,
1
,
channels
))
]
else
:
pooling
=
[
tf
.
keras
.
layers
.
AveragePooling2D
(
self
.
_pool_kernel_size
)]
conv2
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_bias
=
False
)
norm2
=
self
.
_bn_op
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_batchnorm_momentum
,
epsilon
=
self
.
_batchnorm_epsilon
)
self
.
aspp_layers
.
append
(
pooling
+
[
conv2
,
norm2
])
self
.
_resizing_layer
=
tf
.
keras
.
layers
.
Resizing
(
height
,
width
,
interpolation
=
self
.
_interpolation
,
dtype
=
tf
.
float32
)
self
.
_projection
=
[
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_output_channels
,
kernel_size
=
(
1
,
1
),
kernel_initializer
=
self
.
_kernel_initializer
,
kernel_regularizer
=
self
.
_kernel_regularizer
,
use_bias
=
False
),
self
.
_bn_op
(
axis
=
self
.
_bn_axis
,
momentum
=
self
.
_batchnorm_momentum
,
epsilon
=
self
.
_batchnorm_epsilon
)
]
self
.
_dropout_layer
=
tf
.
keras
.
layers
.
Dropout
(
rate
=
self
.
_dropout
)
self
.
_concat_layer
=
tf
.
keras
.
layers
.
Concatenate
(
axis
=-
1
)
def
call
(
self
,
inputs
:
tf
.
Tensor
,
training
:
Optional
[
bool
]
=
None
)
->
tf
.
Tensor
:
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
result
=
[]
for
i
,
layers
in
enumerate
(
self
.
aspp_layers
):
x
=
inputs
for
layer
in
layers
:
# Apply layers sequentially.
x
=
layer
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
# Apply resize layer to the end of the last set of layers.
if
i
==
len
(
self
.
aspp_layers
)
-
1
:
x
=
self
.
_resizing_layer
(
x
)
result
.
append
(
tf
.
cast
(
x
,
inputs
.
dtype
))
x
=
self
.
_concat_layer
(
result
)
for
layer
in
self
.
_projection
:
x
=
layer
(
x
,
training
=
training
)
x
=
self
.
_activation_fn
(
x
)
return
self
.
_dropout_layer
(
x
)
def
get_config
(
self
):
config
=
{
'output_channels'
:
self
.
_output_channels
,
'dilation_rates'
:
self
.
_dilation_rates
,
'pool_kernel_size'
:
self
.
_pool_kernel_size
,
'use_sync_bn'
:
self
.
_use_sync_bn
,
'batchnorm_momentum'
:
self
.
_batchnorm_momentum
,
'batchnorm_epsilon'
:
self
.
_batchnorm_epsilon
,
'activation'
:
self
.
_activation
,
'dropout'
:
self
.
_dropout
,
'kernel_initializer'
:
self
.
_kernel_initializer
,
'kernel_regularizer'
:
self
.
_kernel_regularizer
,
'interpolation'
:
self
.
_interpolation
,
}
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
official/vision/modeling/layers/nn_layers_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for nn_layers."""
# Import libraries
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
nn_layers
class
NNLayersTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
def
test_scale
(
self
):
scale
=
nn_layers
.
Scale
(
initializer
=
tf
.
keras
.
initializers
.
constant
(
10.
))
output
=
scale
(
3.
)
self
.
assertAllEqual
(
output
,
30.
)
def
test_temporal_softmax_pool
(
self
):
inputs
=
tf
.
range
(
4
,
dtype
=
tf
.
float32
)
+
1.
inputs
=
tf
.
reshape
(
inputs
,
[
1
,
4
,
1
,
1
,
1
])
layer
=
nn_layers
.
TemporalSoftmaxPool
()
output
=
layer
(
inputs
)
self
.
assertAllClose
(
output
,
[[[[[
0.10153633
]]],
[[[
0.33481020
]]],
[[[
0.82801306
]]],
[[[
1.82021690
]]]]])
def
test_positional_encoding
(
self
):
pos_encoding
=
nn_layers
.
PositionalEncoding
(
initializer
=
'ones'
,
cache_encoding
=
False
)
pos_encoding_cached
=
nn_layers
.
PositionalEncoding
(
initializer
=
'ones'
,
cache_encoding
=
True
)
inputs
=
tf
.
ones
([
1
,
4
,
1
,
1
,
3
])
outputs
,
_
=
pos_encoding
(
inputs
)
outputs_cached
,
_
=
pos_encoding_cached
(
inputs
)
expected
=
tf
.
constant
(
[[[[[
1.0000000
,
1.0000000
,
2.0000000
]]],
[[[
1.8414710
,
1.0021545
,
1.5403023
]]],
[[[
1.9092975
,
1.0043088
,
0.5838531
]]],
[[[
1.1411200
,
1.0064633
,
0.0100075
]]]]])
self
.
assertEqual
(
outputs
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
outputs
,
expected
)
self
.
assertEqual
(
outputs
.
shape
,
outputs_cached
.
shape
)
self
.
assertAllClose
(
outputs
,
outputs_cached
)
inputs
=
tf
.
ones
([
1
,
5
,
1
,
1
,
3
])
_
=
pos_encoding
(
inputs
)
def
test_positional_encoding_bfloat16
(
self
):
pos_encoding
=
nn_layers
.
PositionalEncoding
(
initializer
=
'ones'
)
inputs
=
tf
.
ones
([
1
,
4
,
1
,
1
,
3
],
dtype
=
tf
.
bfloat16
)
outputs
,
_
=
pos_encoding
(
inputs
)
expected
=
tf
.
constant
(
[[[[[
1.0000000
,
1.0000000
,
2.0000000
]]],
[[[
1.8414710
,
1.0021545
,
1.5403023
]]],
[[[
1.9092975
,
1.0043088
,
0.5838531
]]],
[[[
1.1411200
,
1.0064633
,
0.0100075
]]]]])
self
.
assertEqual
(
outputs
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
outputs
,
expected
)
def
test_global_average_pool_basic
(
self
):
pool
=
nn_layers
.
GlobalAveragePool3D
(
keepdims
=
True
)
inputs
=
tf
.
ones
([
1
,
2
,
3
,
4
,
1
])
outputs
=
pool
(
inputs
,
output_states
=
False
)
expected
=
tf
.
ones
([
1
,
1
,
1
,
1
,
1
])
self
.
assertEqual
(
outputs
.
shape
,
expected
.
shape
)
self
.
assertAllEqual
(
outputs
,
expected
)
def
test_positional_encoding_stream
(
self
):
pos_encoding
=
nn_layers
.
PositionalEncoding
(
initializer
=
'ones'
,
cache_encoding
=
False
)
inputs
=
tf
.
range
(
4
,
dtype
=
tf
.
float32
)
+
1.
inputs
=
tf
.
reshape
(
inputs
,
[
1
,
4
,
1
,
1
,
1
])
inputs
=
tf
.
tile
(
inputs
,
[
1
,
1
,
1
,
1
,
3
])
expected
,
_
=
pos_encoding
(
inputs
)
for
num_splits
in
[
1
,
2
,
4
]:
frames
=
tf
.
split
(
inputs
,
num_splits
,
axis
=
1
)
states
=
{}
predicted
=
[]
for
frame
in
frames
:
output
,
states
=
pos_encoding
(
frame
,
states
=
states
)
predicted
.
append
(
output
)
predicted
=
tf
.
concat
(
predicted
,
axis
=
1
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
1.0000000
,
1.0000000
,
2.0000000
]]],
[[[
2.8414710
,
2.0021544
,
2.5403023
]]],
[[[
3.9092975
,
3.0043090
,
2.5838532
]]],
[[[
4.1411200
,
4.0064630
,
3.0100074
]]]]])
def
test_global_average_pool_keras
(
self
):
pool
=
nn_layers
.
GlobalAveragePool3D
(
keepdims
=
False
)
keras_pool
=
tf
.
keras
.
layers
.
GlobalAveragePooling3D
()
inputs
=
10
*
tf
.
random
.
normal
([
1
,
2
,
3
,
4
,
1
])
outputs
=
pool
(
inputs
,
output_states
=
False
)
keras_output
=
keras_pool
(
inputs
)
self
.
assertAllEqual
(
outputs
.
shape
,
keras_output
.
shape
)
self
.
assertAllClose
(
outputs
,
keras_output
)
def
test_stream_global_average_pool
(
self
):
gap
=
nn_layers
.
GlobalAveragePool3D
(
keepdims
=
True
,
causal
=
False
)
inputs
=
tf
.
range
(
4
,
dtype
=
tf
.
float32
)
+
1.
inputs
=
tf
.
reshape
(
inputs
,
[
1
,
4
,
1
,
1
,
1
])
inputs
=
tf
.
tile
(
inputs
,
[
1
,
1
,
2
,
2
,
3
])
expected
,
_
=
gap
(
inputs
)
for
num_splits
in
[
1
,
2
,
4
]:
frames
=
tf
.
split
(
inputs
,
num_splits
,
axis
=
1
)
states
=
{}
predicted
=
None
for
frame
in
frames
:
predicted
,
states
=
gap
(
frame
,
states
=
states
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
2.5
,
2.5
,
2.5
]]]]])
def
test_causal_stream_global_average_pool
(
self
):
gap
=
nn_layers
.
GlobalAveragePool3D
(
keepdims
=
True
,
causal
=
True
)
inputs
=
tf
.
range
(
4
,
dtype
=
tf
.
float32
)
+
1.
inputs
=
tf
.
reshape
(
inputs
,
[
1
,
4
,
1
,
1
,
1
])
inputs
=
tf
.
tile
(
inputs
,
[
1
,
1
,
2
,
2
,
3
])
expected
,
_
=
gap
(
inputs
)
for
num_splits
in
[
1
,
2
,
4
]:
frames
=
tf
.
split
(
inputs
,
num_splits
,
axis
=
1
)
states
=
{}
predicted
=
[]
for
frame
in
frames
:
x
,
states
=
gap
(
frame
,
states
=
states
)
predicted
.
append
(
x
)
predicted
=
tf
.
concat
(
predicted
,
axis
=
1
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
1.0
,
1.0
,
1.0
]]],
[[[
1.5
,
1.5
,
1.5
]]],
[[[
2.0
,
2.0
,
2.0
]]],
[[[
2.5
,
2.5
,
2.5
]]]]])
def
test_spatial_average_pool
(
self
):
pool
=
nn_layers
.
SpatialAveragePool3D
(
keepdims
=
True
)
inputs
=
tf
.
range
(
64
,
dtype
=
tf
.
float32
)
+
1.
inputs
=
tf
.
reshape
(
inputs
,
[
1
,
4
,
4
,
4
,
1
])
output
=
pool
(
inputs
)
self
.
assertEqual
(
output
.
shape
,
[
1
,
4
,
1
,
1
,
1
])
self
.
assertAllClose
(
output
,
[[[[[
8.50
]]],
[[[
24.5
]]],
[[[
40.5
]]],
[[[
56.5
]]]]])
def
test_conv2d_causal
(
self
):
conv2d
=
nn_layers
.
Conv2D
(
filters
=
3
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
2
),
padding
=
'causal'
,
use_buffered_input
=
True
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
4
,
2
,
3
])
paddings
=
[[
0
,
0
],
[
2
,
0
],
[
0
,
0
],
[
0
,
0
]]
padded_inputs
=
tf
.
pad
(
inputs
,
paddings
)
predicted
=
conv2d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[
6.0
,
6.0
,
6.0
]],
[[
12.
,
12.
,
12.
]],
[[
18.
,
18.
,
18.
]],
[[
18.
,
18.
,
18.
]]]])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
conv2d
.
use_buffered_input
=
False
predicted
=
conv2d
(
inputs
)
self
.
assertFalse
(
conv2d
.
use_buffered_input
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
def
test_depthwise_conv2d_causal
(
self
):
conv2d
=
nn_layers
.
DepthwiseConv2D
(
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'causal'
,
use_buffered_input
=
True
,
depthwise_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
2
,
2
,
3
])
paddings
=
[[
0
,
0
],
[
2
,
0
],
[
0
,
0
],
[
0
,
0
]]
padded_inputs
=
tf
.
pad
(
inputs
,
paddings
)
predicted
=
conv2d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[
2.
,
2.
,
2.
],
[
2.
,
2.
,
2.
]],
[[
4.
,
4.
,
4.
],
[
4.
,
4.
,
4.
]]]])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
conv2d
.
use_buffered_input
=
False
predicted
=
conv2d
(
inputs
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
def
test_conv3d_causal
(
self
):
conv3d
=
nn_layers
.
Conv3D
(
filters
=
3
,
kernel_size
=
(
3
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'causal'
,
use_buffered_input
=
True
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
2
,
4
,
4
,
3
])
paddings
=
[[
0
,
0
],
[
2
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
]]
padded_inputs
=
tf
.
pad
(
inputs
,
paddings
)
predicted
=
conv3d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[[
27.
,
27.
,
27.
],
[
18.
,
18.
,
18.
]],
[[
18.
,
18.
,
18.
],
[
12.
,
12.
,
12.
]]],
[[[
54.
,
54.
,
54.
],
[
36.
,
36.
,
36.
]],
[[
36.
,
36.
,
36.
],
[
24.
,
24.
,
24.
]]]]])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
conv3d
.
use_buffered_input
=
False
predicted
=
conv3d
(
inputs
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
def
test_depthwise_conv3d_causal
(
self
):
conv3d
=
nn_layers
.
Conv3D
(
filters
=
3
,
kernel_size
=
(
3
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'causal'
,
use_buffered_input
=
True
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
groups
=
3
,
)
inputs
=
tf
.
ones
([
1
,
2
,
4
,
4
,
3
])
paddings
=
[[
0
,
0
],
[
2
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
]]
padded_inputs
=
tf
.
pad
(
inputs
,
paddings
)
predicted
=
conv3d
(
padded_inputs
)
expected
=
tf
.
constant
(
[[[[[
9.0
,
9.0
,
9.0
],
[
6.0
,
6.0
,
6.0
]],
[[
6.0
,
6.0
,
6.0
],
[
4.0
,
4.0
,
4.0
]]],
[[[
18.0
,
18.0
,
18.0
],
[
12.
,
12.
,
12.
]],
[[
12.
,
12.
,
12.
],
[
8.
,
8.
,
8.
]]]]])
output_shape
=
conv3d
.
_spatial_output_shape
([
4
,
4
,
4
])
self
.
assertAllClose
(
output_shape
,
[
2
,
2
,
2
])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
conv3d
.
use_buffered_input
=
False
predicted
=
conv3d
(
inputs
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
def
test_conv3d_causal_padding_2d
(
self
):
"""Test to ensure causal padding works like standard padding."""
conv3d
=
nn_layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
1
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'causal'
,
use_buffered_input
=
False
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
keras_conv3d
=
tf
.
keras
.
layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
1
,
3
,
3
),
strides
=
(
1
,
2
,
2
),
padding
=
'same'
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
1
,
4
,
4
,
1
])
predicted
=
conv3d
(
inputs
)
expected
=
keras_conv3d
(
inputs
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
9.
],
[
6.
]],
[[
6.
],
[
4.
]]]]])
def
test_conv3d_causal_padding_1d
(
self
):
"""Test to ensure causal padding works like standard padding."""
conv3d
=
nn_layers
.
Conv3D
(
filters
=
1
,
kernel_size
=
(
3
,
1
,
1
),
strides
=
(
2
,
1
,
1
),
padding
=
'causal'
,
use_buffered_input
=
False
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
keras_conv1d
=
tf
.
keras
.
layers
.
Conv1D
(
filters
=
1
,
kernel_size
=
3
,
strides
=
2
,
padding
=
'causal'
,
kernel_initializer
=
'ones'
,
use_bias
=
False
,
)
inputs
=
tf
.
ones
([
1
,
4
,
1
,
1
,
1
])
predicted
=
conv3d
(
inputs
)
expected
=
keras_conv1d
(
tf
.
squeeze
(
inputs
,
axis
=
[
2
,
3
]))
expected
=
tf
.
reshape
(
expected
,
[
1
,
2
,
1
,
1
,
1
])
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
[[[[[
1.
]]],
[[[
3.
]]]]])
@
parameterized
.
parameters
(
(
None
,
[]),
(
None
,
[
6
,
12
,
18
]),
([
32
,
32
],
[
6
,
12
,
18
]),
)
def
test_aspp
(
self
,
pool_kernel_size
,
dilation_rates
):
inputs
=
tf
.
keras
.
Input
(
shape
=
(
64
,
64
,
128
),
dtype
=
tf
.
float32
)
layer
=
nn_layers
.
SpatialPyramidPooling
(
output_channels
=
256
,
dilation_rates
=
dilation_rates
,
pool_kernel_size
=
pool_kernel_size
)
output
=
layer
(
inputs
)
self
.
assertAllEqual
([
None
,
64
,
64
,
256
],
output
.
shape
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/modeling/layers/roi_aligner.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of ROI aligner."""
from
typing
import
Mapping
import
tensorflow
as
tf
from
official.vision.ops
import
spatial_transform_ops
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
MultilevelROIAligner
(
tf
.
keras
.
layers
.
Layer
):
"""Performs ROIAlign for the second stage processing."""
def
__init__
(
self
,
crop_size
:
int
=
7
,
sample_offset
:
float
=
0.5
,
**
kwargs
):
"""Initializes a ROI aligner.
Args:
crop_size: An `int` of the output size of the cropped features.
sample_offset: A `float` in [0, 1] of the subpixel sample offset.
**kwargs: Additional keyword arguments passed to Layer.
"""
self
.
_config_dict
=
{
'crop_size'
:
crop_size
,
'sample_offset'
:
sample_offset
,
}
super
(
MultilevelROIAligner
,
self
).
__init__
(
**
kwargs
)
def
call
(
self
,
features
:
Mapping
[
str
,
tf
.
Tensor
],
boxes
:
tf
.
Tensor
,
training
:
bool
=
None
):
"""Generates ROIs.
Args:
features: A dictionary with key as pyramid level and value as features.
The features are in shape of
[batch_size, height_l, width_l, num_filters].
boxes: A 3-D `tf.Tensor` of shape [batch_size, num_boxes, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
from grid point.
training: A `bool` of whether it is in training mode.
Returns:
A 5-D `tf.Tensor` representing feature crop of shape
[batch_size, num_boxes, crop_size, crop_size, num_filters].
"""
roi_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
features
,
boxes
,
output_size
=
self
.
_config_dict
[
'crop_size'
],
sample_offset
=
self
.
_config_dict
[
'sample_offset'
])
return
roi_features
def
get_config
(
self
):
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/modeling/layers/roi_aligner_test.py
deleted
100644 → 0
View file @
9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for roi_aligner.py."""
# Import libraries
import
tensorflow
as
tf
from
official.vision.modeling.layers
import
roi_aligner
class
MultilevelROIAlignerTest
(
tf
.
test
.
TestCase
):
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
crop_size
=
7
,
sample_offset
=
0.5
,
)
aligner
=
roi_aligner
.
MultilevelROIAligner
(
**
kwargs
)
expected_config
=
dict
(
kwargs
)
self
.
assertEqual
(
aligner
.
get_config
(),
expected_config
)
new_aligner
=
roi_aligner
.
MultilevelROIAligner
.
from_config
(
aligner
.
get_config
())
self
.
assertAllEqual
(
aligner
.
get_config
(),
new_aligner
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment