Commit 7785dec0 authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 425740068
parent 9c93f07c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for SpineNet."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.modeling.backbones import spinenet_mobile
class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 0.6, 1, 0.0, 24),
(128, 0.65, 1, 0.2, 40),
(256, 1.0, 1, 0.2, 48),
)
def test_network_creation(self, input_size, filter_size_scale, block_repeats,
se_ratio, endpoints_num_filters):
"""Test creation of SpineNet models."""
min_level = 3
max_level = 7
tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size, input_size, 3])
model = spinenet_mobile.SpineNetMobile(
input_specs=input_specs,
min_level=min_level,
max_level=max_level,
endpoints_num_filters=endpoints_num_filters,
resample_alpha=se_ratio,
block_repeats=block_repeats,
filter_size_scale=filter_size_scale,
init_stochastic_depth_rate=0.2,
)
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = model(inputs)
for l in range(min_level, max_level + 1):
self.assertIn(str(l), endpoints.keys())
self.assertAllEqual(
[1, input_size / 2**l, input_size / 2**l, endpoints_num_filters],
endpoints[str(l)].shape.as_list())
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
min_level=3,
max_level=7,
endpoints_num_filters=256,
se_ratio=0.2,
expand_ratio=6,
block_repeats=1,
filter_size_scale=1.0,
init_stochastic_depth_rate=0.2,
use_sync_bn=False,
activation='relu',
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
use_keras_upsampling_2d=False,
)
network = spinenet_mobile.SpineNetMobile(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = spinenet_mobile.SpineNetMobile.from_config(
network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for SpineNet."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.modeling.backbones import spinenet
class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 0.65, 1, 0.5, 128, 4, 6),
(256, 1.0, 1, 0.5, 256, 3, 6),
(384, 1.0, 2, 0.5, 256, 4, 7),
(512, 1.0, 3, 1.0, 256, 3, 7),
(640, 1.3, 4, 1.0, 384, 3, 7),
)
def test_network_creation(self, input_size, filter_size_scale, block_repeats,
resample_alpha, endpoints_num_filters, min_level,
max_level):
"""Test creation of SpineNet models."""
tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size, input_size, 3])
model = spinenet.SpineNet(
input_specs=input_specs,
min_level=min_level,
max_level=max_level,
endpoints_num_filters=endpoints_num_filters,
resample_alpha=resample_alpha,
block_repeats=block_repeats,
filter_size_scale=filter_size_scale,
init_stochastic_depth_rate=0.2,
)
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = model(inputs)
for l in range(min_level, max_level + 1):
self.assertIn(str(l), endpoints.keys())
self.assertAllEqual(
[1, input_size / 2**l, input_size / 2**l, endpoints_num_filters],
endpoints[str(l)].shape.as_list())
@parameterized.parameters(
((128, 128), (128, 128)),
((128, 128), (256, 256)),
((640, 640), (896, 1664)),
)
def test_load_from_different_input_specs(self, input_size_1, input_size_2):
"""Test loading checkpoints with different input size."""
def build_spinenet(input_size):
tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model = spinenet.SpineNet(
input_specs=input_specs,
min_level=3,
max_level=7,
endpoints_num_filters=384,
resample_alpha=1.0,
block_repeats=2,
filter_size_scale=0.5)
return model
model_1 = build_spinenet(input_size_1)
model_2 = build_spinenet(input_size_2)
ckpt_1 = tf.train.Checkpoint(backbone=model_1)
ckpt_2 = tf.train.Checkpoint(backbone=model_2)
ckpt_path = self.get_temp_dir() + '/ckpt'
ckpt_1.write(ckpt_path)
ckpt_2.restore(ckpt_path).expect_partial()
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
min_level=3,
max_level=7,
endpoints_num_filters=256,
resample_alpha=0.5,
block_repeats=1,
filter_size_scale=1.0,
init_stochastic_depth_rate=0.2,
use_sync_bn=False,
activation='relu',
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
)
network = spinenet.SpineNet(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = spinenet.SpineNet.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build classification models."""
from typing import Any, Mapping, Optional
# Import libraries
import tensorflow as tf
layers = tf.keras.layers
@tf.keras.utils.register_keras_serializable(package='Vision')
class ClassificationModel(tf.keras.Model):
"""A classification class builder."""
def __init__(
self,
backbone: tf.keras.Model,
num_classes: int,
input_specs: tf.keras.layers.InputSpec = layers.InputSpec(
shape=[None, None, None, 3]),
dropout_rate: float = 0.0,
kernel_initializer: str = 'random_uniform',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
add_head_batch_norm: bool = False,
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
skip_logits_layer: bool = False,
**kwargs):
"""Classification initialization function.
Args:
backbone: a backbone network.
num_classes: `int` number of classes in classification task.
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
dropout_rate: `float` rate for dropout regularization.
kernel_initializer: kernel initializer for the dense layer.
kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
bias_regularizer: tf.keras.regularizers.Regularizer object. Default to
None.
add_head_batch_norm: `bool` whether to add a batch normalization layer
before pool.
use_sync_bn: `bool` if True, use synchronized batch normalization.
norm_momentum: `float` normalization momentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
skip_logits_layer: `bool`, whether to skip the prediction layer.
**kwargs: keyword arguments to be passed.
"""
if use_sync_bn:
norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
norm = tf.keras.layers.BatchNormalization
axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1
inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
endpoints = backbone(inputs)
x = endpoints[max(endpoints.keys())]
if add_head_batch_norm:
x = norm(axis=axis, momentum=norm_momentum, epsilon=norm_epsilon)(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
if not skip_logits_layer:
x = tf.keras.layers.Dropout(dropout_rate)(x)
x = tf.keras.layers.Dense(
num_classes,
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
x)
super(ClassificationModel, self).__init__(
inputs=inputs, outputs=x, **kwargs)
self._config_dict = {
'backbone': backbone,
'num_classes': num_classes,
'input_specs': input_specs,
'dropout_rate': dropout_rate,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'add_head_batch_norm': add_head_batch_norm,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
}
self._input_specs = input_specs
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._backbone = backbone
self._norm = norm
@property
def checkpoint_items(self) -> Mapping[str, tf.keras.Model]:
"""Returns a dictionary of items to be additionally checkpointed."""
return dict(backbone=self.backbone)
@property
def backbone(self) -> tf.keras.Model:
return self._backbone
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for classification network."""
# Import libraries
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.vision.modeling import backbones
from official.vision.modeling import classification_model
class ClassificationNetworkTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 50, 'relu'),
(128, 50, 'relu'),
(128, 50, 'swish'),
)
def test_resnet_network_creation(
self, input_size, resnet_model_id, activation):
"""Test for creation of a ResNet-50 classifier."""
inputs = np.random.rand(2, input_size, input_size, 3)
tf.keras.backend.set_image_data_format('channels_last')
backbone = backbones.ResNet(
model_id=resnet_model_id, activation=activation)
self.assertEqual(backbone.count_params(), 23561152)
num_classes = 1000
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=num_classes,
dropout_rate=0.2,
)
self.assertEqual(model.count_params(), 25610152)
logits = model(inputs)
self.assertAllEqual([2, num_classes], logits.numpy().shape)
def test_revnet_network_creation(self):
"""Test for creation of a RevNet-56 classifier."""
revnet_model_id = 56
inputs = np.random.rand(2, 224, 224, 3)
tf.keras.backend.set_image_data_format('channels_last')
backbone = backbones.RevNet(model_id=revnet_model_id)
self.assertEqual(backbone.count_params(), 19473792)
num_classes = 1000
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=num_classes,
dropout_rate=0.2,
add_head_batch_norm=True,
)
self.assertEqual(model.count_params(), 22816104)
logits = model(inputs)
self.assertAllEqual([2, num_classes], logits.numpy().shape)
@combinations.generate(
combinations.combine(
mobilenet_model_id=[
'MobileNetV1',
'MobileNetV2',
'MobileNetV3Large',
'MobileNetV3Small',
'MobileNetV3EdgeTPU',
'MobileNetMultiAVG',
'MobileNetMultiMAX',
],
filter_size_scale=[1.0, 0.75],
))
def test_mobilenet_network_creation(self, mobilenet_model_id,
filter_size_scale):
"""Test for creation of a MobileNet classifier."""
inputs = np.random.rand(2, 224, 224, 3)
tf.keras.backend.set_image_data_format('channels_last')
backbone = backbones.MobileNet(
model_id=mobilenet_model_id, filter_size_scale=filter_size_scale)
num_classes = 1001
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=num_classes,
dropout_rate=0.2,
)
logits = model(inputs)
self.assertAllEqual([2, num_classes], logits.numpy().shape)
@combinations.generate(
combinations.combine(
strategy=[
strategy_combinations.cloud_tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],
use_sync_bn=[False, True],
))
def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
"""Test for sync bn on TPU and GPU devices."""
inputs = np.random.rand(64, 128, 128, 3)
tf.keras.backend.set_image_data_format('channels_last')
with strategy.scope():
backbone = backbones.ResNet(model_id=50, use_sync_bn=use_sync_bn)
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=1000,
dropout_rate=0.2,
)
_ = model(inputs)
@combinations.generate(
combinations.combine(
strategy=[
strategy_combinations.one_device_strategy_gpu,
],
data_format=['channels_last', 'channels_first'],
input_dim=[1, 3, 4]))
def test_data_format_gpu(self, strategy, data_format, input_dim):
"""Test for different data formats on GPU devices."""
if data_format == 'channels_last':
inputs = np.random.rand(2, 128, 128, input_dim)
else:
inputs = np.random.rand(2, input_dim, 128, 128)
input_specs = tf.keras.layers.InputSpec(shape=inputs.shape)
tf.keras.backend.set_image_data_format(data_format)
with strategy.scope():
backbone = backbones.ResNet(model_id=50, input_specs=input_specs)
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=1000,
input_specs=input_specs,
)
_ = model(inputs)
def test_serialize_deserialize(self):
"""Validate the classification net can be serialized and deserialized."""
tf.keras.backend.set_image_data_format('channels_last')
backbone = backbones.ResNet(model_id=50)
model = classification_model.ClassificationModel(
backbone=backbone, num_classes=1000)
config = model.get_config()
new_model = classification_model.ClassificationModel.from_config(config)
# Validate that the config can be forced to JSON.
_ = new_model.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(model.get_config(), new_model.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Decoders package definition."""
from official.vision.modeling.decoders.aspp import ASPP
from official.vision.modeling.decoders.fpn import FPN
from official.vision.modeling.decoders.nasfpn import NASFPN
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
from typing import Any, List, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.modeling import hyperparams
from official.vision.modeling.decoders import factory
from official.vision.modeling.layers import deeplab
from official.vision.modeling.layers import nn_layers
TensorMapUnion = Union[tf.Tensor, Mapping[str, tf.Tensor]]
@tf.keras.utils.register_keras_serializable(package='Vision')
class ASPP(tf.keras.layers.Layer):
"""Creates an Atrous Spatial Pyramid Pooling (ASPP) layer."""
def __init__(
self,
level: int,
dilation_rates: List[int],
num_filters: int = 256,
pool_kernel_size: Optional[int] = None,
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
activation: str = 'relu',
dropout_rate: float = 0.0,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
interpolation: str = 'bilinear',
use_depthwise_convolution: bool = False,
spp_layer_version: str = 'v1',
output_tensor: bool = False,
**kwargs):
"""Initializes an Atrous Spatial Pyramid Pooling (ASPP) layer.
Args:
level: An `int` level to apply ASPP.
dilation_rates: A `list` of dilation rates.
num_filters: An `int` number of output filters in ASPP.
pool_kernel_size: A `list` of [height, width] of pooling kernel size or
None. Pooling size is with respect to original image size, it will be
scaled down by 2**level. If None, global average pooling is used.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
activation: A `str` activation to be used in ASPP.
dropout_rate: A `float` rate for dropout regularization.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
interpolation: A `str` of interpolation method. It should be one of
`bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
`gaussian`, or `mitchellcubic`.
use_depthwise_convolution: If True depthwise separable convolutions will
be added to the Atrous spatial pyramid pooling.
spp_layer_version: A `str` of spatial pyramid pooling layer version.
output_tensor: Whether to output a single tensor or a dictionary of tensor.
Default is false.
**kwargs: Additional keyword arguments to be passed.
"""
super().__init__(**kwargs)
self._config_dict = {
'level': level,
'dilation_rates': dilation_rates,
'num_filters': num_filters,
'pool_kernel_size': pool_kernel_size,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'activation': activation,
'dropout_rate': dropout_rate,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'interpolation': interpolation,
'use_depthwise_convolution': use_depthwise_convolution,
'spp_layer_version': spp_layer_version,
'output_tensor': output_tensor
}
self._aspp_layer = deeplab.SpatialPyramidPooling if self._config_dict[
'spp_layer_version'] == 'v1' else nn_layers.SpatialPyramidPooling
def build(self, input_shape):
pool_kernel_size = None
if self._config_dict['pool_kernel_size']:
pool_kernel_size = [
int(p_size // 2**self._config_dict['level'])
for p_size in self._config_dict['pool_kernel_size']
]
self.aspp = self._aspp_layer(
output_channels=self._config_dict['num_filters'],
dilation_rates=self._config_dict['dilation_rates'],
pool_kernel_size=pool_kernel_size,
use_sync_bn=self._config_dict['use_sync_bn'],
batchnorm_momentum=self._config_dict['norm_momentum'],
batchnorm_epsilon=self._config_dict['norm_epsilon'],
activation=self._config_dict['activation'],
dropout=self._config_dict['dropout_rate'],
kernel_initializer=self._config_dict['kernel_initializer'],
kernel_regularizer=self._config_dict['kernel_regularizer'],
interpolation=self._config_dict['interpolation'],
use_depthwise_convolution=self._config_dict['use_depthwise_convolution']
)
def call(self, inputs: TensorMapUnion) -> TensorMapUnion:
"""Calls the Atrous Spatial Pyramid Pooling (ASPP) layer on an input.
The output of ASPP will be a dict of {`level`, `tf.Tensor`} even if only one
level is present, if output_tensor is false. Hence, this will be compatible
with the rest of the segmentation model interfaces.
If output_tensor is true, a single tensot is output.
Args:
inputs: A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or
a `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of shape [batch, height_l, width_l,
filter_size].
Returns:
A `tf.Tensor` of shape [batch, height_l, width_l, filter_size] or a `dict`
of `tf.Tensor` where
- key: A `str` of the level of the multilevel feature maps.
- values: A `tf.Tensor` of output of ASPP module.
"""
outputs = {}
level = str(self._config_dict['level'])
backbone_output = inputs[level] if isinstance(inputs, dict) else inputs
outputs = self.aspp(backbone_output)
return outputs if self._config_dict['output_tensor'] else {level: outputs}
def get_config(self) -> Mapping[str, Any]:
base_config = super().get_config()
return dict(list(base_config.items()) + list(self._config_dict.items()))
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@factory.register_decoder_builder('aspp')
def build_aspp_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds ASPP decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. Note this is for consistent
interface, and is not used by ASPP decoder.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the ASPP decoder.
Raises:
ValueError: If the model_config.decoder.type is not `aspp`.
"""
del input_specs # input_specs is not used by ASPP decoder.
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'aspp':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `aspp`.')
norm_activation_config = model_config.norm_activation
return ASPP(
level=decoder_cfg.level,
dilation_rates=decoder_cfg.dilation_rates,
num_filters=decoder_cfg.num_filters,
use_depthwise_convolution=decoder_cfg.use_depthwise_convolution,
pool_kernel_size=decoder_cfg.pool_kernel_size,
dropout_rate=decoder_cfg.dropout_rate,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
activation=norm_activation_config.activation,
kernel_regularizer=l2_regularizer,
spp_layer_version=decoder_cfg.spp_layer_version,
output_tensor=decoder_cfg.output_tensor)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for aspp."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.modeling.backbones import resnet
from official.vision.modeling.decoders import aspp
class ASPPTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(3, [6, 12, 18, 24], 128, 'v1'),
(3, [6, 12, 18], 128, 'v1'),
(3, [6, 12], 256, 'v1'),
(4, [6, 12, 18, 24], 128, 'v2'),
(4, [6, 12, 18], 128, 'v2'),
(4, [6, 12], 256, 'v2'),
)
def test_network_creation(self, level, dilation_rates, num_filters,
spp_layer_version):
"""Test creation of ASPP."""
input_size = 256
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = resnet.ResNet(model_id=50)
network = aspp.ASPP(
level=level,
dilation_rates=dilation_rates,
num_filters=num_filters,
spp_layer_version=spp_layer_version)
endpoints = backbone(inputs)
feats = network(endpoints)
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, num_filters],
feats[str(level)].shape.as_list())
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
level=3,
dilation_rates=[6, 12],
num_filters=256,
pool_kernel_size=None,
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
activation='relu',
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
interpolation='bilinear',
dropout_rate=0.2,
use_depthwise_convolution='false',
spp_layer_version='v1',
output_tensor=False,
dtype='float32',
name='aspp',
trainable=True)
network = aspp.ASPP(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = aspp.ASPP.from_config(network.get_config())
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoder registers and factory method.
One can register a new decoder model by the following two steps:
1 Import the factory and register the build in the decoder file.
2 Import the decoder class and add a build in __init__.py.
```
# my_decoder.py
from modeling.decoders import factory
class MyDecoder():
...
@factory.register_decoder_builder('my_decoder')
def build_my_decoder():
return MyDecoder()
# decoders/__init__.py adds import
from modeling.decoders.my_decoder import MyDecoder
```
If one wants the MyDecoder class to be used only by those binary
then don't imported the decoder module in decoders/__init__.py, but import it
in place that uses it.
"""
from typing import Any, Callable, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.core import registry
from official.modeling import hyperparams
_REGISTERED_DECODER_CLS = {}
def register_decoder_builder(key: str) -> Callable[..., Any]:
"""Decorates a builder of decoder class.
The builder should be a Callable (a class or a function).
This decorator supports registration of decoder builder as follows:
```
class MyDecoder(tf.keras.Model):
pass
@register_decoder_builder('mydecoder')
def builder(input_specs, config, l2_reg):
return MyDecoder(...)
# Builds a MyDecoder object.
my_decoder = build_decoder_3d(input_specs, config, l2_reg)
```
Args:
key: A `str` of key to look up the builder.
Returns:
A callable for using as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return registry.register(_REGISTERED_DECODER_CLS, key)
@register_decoder_builder('identity')
def build_identity(
input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
model_config: Optional[hyperparams.Config] = None,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
"""Builds identity decoder from a config.
All the input arguments are not used by identity decoder but kept here to
ensure the interface is consistent.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
An instance of the identity decoder.
"""
del input_specs, model_config, l2_regularizer # Unused by identity decoder.
def build_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds decoder from a config.
A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
None, the decoder will take features from the backbone as input and generate
decoded feature maps. If it is None, such as an identity decoder, the decoder
is skipped and features from the backbone are regarded as model output.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
**kwargs: Additional keyword args to be passed to decoder builder.
Returns:
An instance of the decoder.
"""
decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
model_config.decoder.type)
return decoder_builder(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer,
**kwargs)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from official.vision import configs
from official.vision.configs import decoders as decoders_cfg
from official.vision.modeling import decoders
from official.vision.modeling.decoders import factory
class FactoryTest(tf.test.TestCase, parameterized.TestCase):
@combinations.generate(
combinations.combine(
num_filters=[128, 256], use_separable_conv=[True, False]))
def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
"""Test creation of FPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.FPN(
input_specs=input_specs,
num_filters=num_filters,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='fpn',
fpn=decoders_cfg.FPN(
num_filters=num_filters, use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
num_filters=[128, 256],
num_repeats=[3, 5],
use_separable_conv=[True, False]))
def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
use_separable_conv):
"""Test creation of NASFPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.NASFPN(
input_specs=input_specs,
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='nasfpn',
nasfpn=decoders_cfg.NASFPN(
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
level=[3, 4],
dilation_rates=[[6, 12, 18], [6, 12]],
num_filters=[128, 256]))
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
"""Test creation of ASPP decoder."""
input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
network = decoders.ASPP(
level=level,
dilation_rates=dilation_rates,
num_filters=num_filters,
use_sync_bn=True)
model_config = configs.semantic_segmentation.SemanticSegmentationModel()
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='aspp',
aspp=decoders_cfg.ASPP(
level=level, dilation_rates=dilation_rates,
num_filters=num_filters))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
# Due to calling `super().get_config()` in aspp layer, everything but the
# the name of two layer instances are the same, so we force equal name so it
# will not give false alarm.
factory_network_config['name'] = network_config['name']
self.assertEqual(network_config, factory_network_config)
def test_identity_decoder_creation(self):
"""Test creation of identity decoder."""
model_config = configs.retinanet.RetinaNet()
model_config.num_classes = 2
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='identity', identity=decoders_cfg.Identity())
factory_network = factory.build_decoder(
input_specs=None, model_config=model_config)
self.assertIsNone(factory_network)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Feature Pyramid Networks (FPN)."""
from typing import Any, Mapping, Optional
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.modeling.decoders import factory
from official.vision.ops import spatial_transform_ops
@tf.keras.utils.register_keras_serializable(package='Vision')
class FPN(tf.keras.Model):
"""Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def __init__(
self,
input_specs: Mapping[str, tf.TensorShape],
min_level: int = 3,
max_level: int = 7,
num_filters: int = 256,
fusion_type: str = 'sum',
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a Feature Pyramid Network (FPN).
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
num_filters: An `int` number of filters in FPN layers.
fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
concat for feature fusion.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self._config_dict = {
'input_specs': input_specs,
'min_level': min_level,
'max_level': max_level,
'num_filters': num_filters,
'fusion_type': fusion_type,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if use_separable_conv:
conv2d = tf.keras.layers.SeparableConv2D
else:
conv2d = tf.keras.layers.Conv2D
if use_sync_bn:
norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
norm = tf.keras.layers.BatchNormalization
activation_fn = tf.keras.layers.Activation(
tf_utils.get_activation(activation))
# Build input feature pyramid.
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = -1
else:
bn_axis = 1
# Get input feature pyramid from backbone.
logging.info('FPN input_specs: %s', input_specs)
inputs = self._build_input_pyramid(input_specs, min_level)
backbone_max_level = min(int(max(inputs.keys())), max_level)
# Build lateral connections.
feats_lateral = {}
for level in range(min_level, backbone_max_level + 1):
feats_lateral[str(level)] = conv2d(
filters=num_filters,
kernel_size=1,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
inputs[str(level)])
# Build top-down path.
feats = {str(backbone_max_level): feats_lateral[str(backbone_max_level)]}
for level in range(backbone_max_level - 1, min_level - 1, -1):
feat_a = spatial_transform_ops.nearest_upsampling(
feats[str(level + 1)], 2)
feat_b = feats_lateral[str(level)]
if fusion_type == 'sum':
feats[str(level)] = feat_a + feat_b
elif fusion_type == 'concat':
feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1)
else:
raise ValueError('Fusion type {} not supported.'.format(fusion_type))
# TODO(xianzhi): consider to remove bias in conv2d.
# Build post-hoc 3x3 convolution kernel.
for level in range(min_level, backbone_max_level + 1):
feats[str(level)] = conv2d(
filters=num_filters,
strides=1,
kernel_size=3,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
feats[str(level)])
# TODO(xianzhi): consider to remove bias in conv2d.
# Build coarser FPN levels introduced for RetinaNet.
for level in range(backbone_max_level + 1, max_level + 1):
feats_in = feats[str(level - 1)]
if level > backbone_max_level + 1:
feats_in = activation_fn(feats_in)
feats[str(level)] = conv2d(
filters=num_filters,
strides=2,
kernel_size=3,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
feats_in)
# Apply batch norm layers.
for level in range(min_level, max_level + 1):
feats[str(level)] = norm(
axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
feats[str(level)])
self._output_specs = {
str(level): feats[str(level)].get_shape()
for level in range(min_level, max_level + 1)
}
super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
min_level: int):
assert isinstance(input_specs, dict)
if min(input_specs.keys()) > str(min_level):
raise ValueError(
'Backbone min level should be less or equal to FPN min level')
inputs = {}
for level, spec in input_specs.items():
inputs[level] = tf.keras.Input(shape=spec[1:])
return inputs
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('fpn')
def build_fpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'fpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `fpn`.')
norm_activation_config = model_config.norm_activation
return FPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
fusion_type=decoder_cfg.fusion_type,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for FPN."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.modeling.backbones import mobilenet
from official.vision.modeling.backbones import resnet
from official.vision.modeling.decoders import fpn
class FPNTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(256, 3, 7, False, 'sum'),
(256, 3, 7, True, 'concat'),
)
def test_network_creation(self, input_size, min_level, max_level,
use_separable_conv, fusion_type):
"""Test creation of FPN."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = resnet.ResNet(model_id=50)
network = fpn.FPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
fusion_type=fusion_type,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
@parameterized.parameters(
(256, 3, 7, False),
(256, 3, 7, True),
)
def test_network_creation_with_mobilenet(self, input_size, min_level,
max_level, use_separable_conv):
"""Test creation of FPN with mobilenet backbone."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = mobilenet.MobileNet(model_id='MobileNetV2')
network = fpn.FPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
input_specs=resnet.ResNet(model_id=50).output_specs,
min_level=3,
max_level=7,
num_filters=256,
fusion_type='sum',
use_separable_conv=False,
use_sync_bn=False,
activation='relu',
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
)
network = fpn.FPN(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = fpn.FPN.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from typing import Any, List, Mapping, Optional, Tuple
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.modeling.decoders import factory
from official.vision.ops import spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS = [
(4, 'attention', (1, 3), False),
(4, 'sum', (1, 5), False),
(3, 'sum', (0, 6), True),
(4, 'sum', (6, 7), True),
(5, 'attention', (7, 8), True),
(7, 'attention', (6, 9), True),
(6, 'attention', (9, 10), True),
]
class BlockSpec():
"""A container class that specifies the block configuration for NAS-FPN."""
def __init__(self, level: int, combine_fn: str,
input_offsets: Tuple[int, int], is_output: bool):
self.level = level
self.combine_fn = combine_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(
block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if not block_specs:
block_specs = NASFPN_BLOCK_SPECS
logging.info('Building NAS-FPN block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf.keras.utils.register_keras_serializable(package='Vision')
class NASFPN(tf.keras.Model):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def __init__(
self,
input_specs: Mapping[str, tf.TensorShape],
min_level: int = 3,
max_level: int = 7,
block_specs: List[BlockSpec] = build_block_specs(),
num_filters: int = 256,
num_repeats: int = 5,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self._config_dict = {
'input_specs': input_specs,
'min_level': min_level,
'max_level': max_level,
'num_filters': num_filters,
'num_repeats': num_repeats,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._num_repeats = num_repeats
self._conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
if self._config_dict['use_separable_conv']:
self._conv_kwargs = {
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
else:
self._conv_kwargs = {
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
self._norm_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._norm_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._activation = tf_utils.get_activation(activation)
# Gets input feature pyramid from backbone.
inputs = self._build_input_pyramid(input_specs, min_level)
# Projects the input features.
feats = []
for level in range(self._min_level, self._max_level + 1):
if str(level) in inputs.keys():
feats.append(self._resample_feature_map(
inputs[str(level)], level, level, self._config_dict['num_filters']))
else:
feats.append(self._resample_feature_map(
feats[-1], level - 1, level, self._config_dict['num_filters']))
# Repeatly builds the NAS-FPN modules.
for _ in range(self._num_repeats):
output_feats = self._build_feature_pyramid(feats)
feats = [output_feats[level]
for level in range(self._min_level, self._max_level + 1)]
self._output_specs = {
str(level): output_feats[level].get_shape()
for level in range(min_level, max_level + 1)
}
output_feats = {str(level): output_feats[level]
for level in output_feats.keys()}
super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs)
def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
min_level: int):
assert isinstance(input_specs, dict)
if min(input_specs.keys()) > str(min_level):
raise ValueError(
'Backbone min level should be less or equal to FPN min level')
inputs = {}
for level, spec in input_specs.items():
inputs[level] = tf.keras.Input(shape=spec[1:])
return inputs
def _resample_feature_map(self,
inputs,
input_level,
target_level,
target_num_filters=256):
x = inputs
_, _, _, input_num_filters = x.get_shape().as_list()
if input_num_filters != target_num_filters:
x = self._conv_op(
filters=target_num_filters,
kernel_size=1,
padding='same',
**self._conv_kwargs)(x)
x = self._norm_op(**self._norm_kwargs)(x)
if input_level < target_level:
stride = int(2 ** (target_level - input_level))
return tf.keras.layers.MaxPool2D(
pool_size=stride, strides=stride, padding='same')(x)
if input_level > target_level:
scale = int(2 ** (input_level - target_level))
return spatial_transform_ops.nearest_upsampling(x, scale=scale)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype = tf.keras.layers.Layer().dtype_policy.compute_dtype
if (compute_dtype is not None) and (x.dtype != compute_dtype):
return tf.cast(x, dtype=compute_dtype)
else:
return x
def _global_attention(self, feat0, feat1):
m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True)
m = tf.math.sigmoid(m)
return feat0 + feat1 * m
def _build_feature_pyramid(self, feats):
num_output_connections = [0] * len(feats)
num_output_levels = self._max_level - self._min_level + 1
feat_levels = list(range(self._min_level, self._max_level + 1))
for i, block_spec in enumerate(self._block_specs):
new_level = block_spec.level
# Checks the range of input_offsets.
for input_offset in block_spec.input_offsets:
if input_offset >= len(feats):
raise ValueError(
'input_offset ({}) is larger than num feats({})'.format(
input_offset, len(feats)))
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
# Update graph with inputs.
node0 = feats[input0]
node0_level = feat_levels[input0]
num_output_connections[input0] += 1
node0 = self._resample_feature_map(node0, node0_level, new_level)
node1 = feats[input1]
node1_level = feat_levels[input1]
num_output_connections[input1] += 1
node1 = self._resample_feature_map(node1, node1_level, new_level)
# Combine node0 and node1 to create new feat.
if block_spec.combine_fn == 'sum':
new_node = node0 + node1
elif block_spec.combine_fn == 'attention':
if node0_level >= node1_level:
new_node = self._global_attention(node0, node1)
else:
new_node = self._global_attention(node1, node0)
else:
raise ValueError('unknown combine_fn `{}`.'
.format(block_spec.combine_fn))
# Add intermediate nodes that do not have any connections to output.
if block_spec.is_output:
for j, (feat, feat_level, num_output) in enumerate(
zip(feats, feat_levels, num_output_connections)):
if num_output == 0 and feat_level == new_level:
num_output_connections[j] += 1
feat_ = self._resample_feature_map(feat, feat_level, new_level)
new_node += feat_
new_node = self._activation(new_node)
new_node = self._conv_op(
filters=self._config_dict['num_filters'],
kernel_size=(3, 3),
padding='same',
**self._conv_kwargs)(new_node)
new_node = self._norm_op(**self._norm_kwargs)(new_node)
feats.append(new_node)
feat_levels.append(new_level)
num_output_connections.append(0)
output_feats = {}
for i in range(len(feats) - num_output_levels, len(feats)):
level = feat_levels[i]
output_feats[level] = feats[i]
logging.info('Output feature pyramid: %s', output_feats)
return output_feats
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('nasfpn')
def build_nasfpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'nasfpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `nasfpn`.')
norm_activation_config = model_config.norm_activation
return NASFPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
num_repeats=decoder_cfg.num_repeats,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for NAS-FPN."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.modeling.backbones import resnet
from official.vision.modeling.decoders import nasfpn
class NASFPNTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(256, 3, 7, False),
(256, 3, 7, True),
)
def test_network_creation(self, input_size, min_level, max_level,
use_separable_conv):
"""Test creation of NAS-FPN."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
num_filters = 256
backbone = resnet.ResNet(model_id=50)
network = nasfpn.NASFPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
num_filters=num_filters,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, num_filters],
feats[str(level)].shape.as_list())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
from typing import Optional
import tensorflow as tf
from official.vision.configs import image_classification as classification_cfg
from official.vision.configs import maskrcnn as maskrcnn_cfg
from official.vision.configs import retinanet as retinanet_cfg
from official.vision.configs import semantic_segmentation as segmentation_cfg
from official.vision.modeling import backbones
from official.vision.modeling import classification_model
from official.vision.modeling import decoders
from official.vision.modeling import maskrcnn_model
from official.vision.modeling import retinanet_model
from official.vision.modeling import segmentation_model
from official.vision.modeling.heads import dense_prediction_heads
from official.vision.modeling.heads import instance_heads
from official.vision.modeling.heads import segmentation_heads
from official.vision.modeling.layers import detection_generator
from official.vision.modeling.layers import mask_sampler
from official.vision.modeling.layers import roi_aligner
from official.vision.modeling.layers import roi_generator
from official.vision.modeling.layers import roi_sampler
def build_classification_model(
input_specs: tf.keras.layers.InputSpec,
model_config: classification_cfg.ImageClassificationModel,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
skip_logits_layer: bool = False,
backbone: Optional[tf.keras.Model] = None) -> tf.keras.Model:
"""Builds the classification model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=model_config.num_classes,
input_specs=input_specs,
dropout_rate=model_config.dropout_rate,
kernel_initializer=model_config.kernel_initializer,
kernel_regularizer=l2_regularizer,
add_head_batch_norm=model_config.add_head_batch_norm,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
skip_logits_layer=skip_logits_layer)
return model
def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
model_config: maskrcnn_cfg.MaskRCNN,
l2_regularizer: Optional[
tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.Model] = None,
decoder: Optional[tf.keras.Model] = None) -> tf.keras.Model:
"""Builds Mask R-CNN model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
rpn_head_config = model_config.rpn_head
roi_generator_config = model_config.roi_generator
roi_sampler_config = model_config.roi_sampler
roi_aligner_config = model_config.roi_aligner
detection_head_config = model_config.detection_head
generator_config = model_config.detection_generator
num_anchors_per_location = (
len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
rpn_head = dense_prediction_heads.RPNHead(
min_level=model_config.min_level,
max_level=model_config.max_level,
num_anchors_per_location=num_anchors_per_location,
num_convs=rpn_head_config.num_convs,
num_filters=rpn_head_config.num_filters,
use_separable_conv=rpn_head_config.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
detection_head = instance_heads.DetectionHead(
num_classes=model_config.num_classes,
num_convs=detection_head_config.num_convs,
num_filters=detection_head_config.num_filters,
use_separable_conv=detection_head_config.use_separable_conv,
num_fcs=detection_head_config.num_fcs,
fc_dims=detection_head_config.fc_dims,
class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
name='detection_head')
if decoder:
decoder_features = decoder(backbone_features)
rpn_head(decoder_features)
if roi_sampler_config.cascade_iou_thresholds:
detection_head_cascade = [detection_head]
for cascade_num in range(len(roi_sampler_config.cascade_iou_thresholds)):
detection_head = instance_heads.DetectionHead(
num_classes=model_config.num_classes,
num_convs=detection_head_config.num_convs,
num_filters=detection_head_config.num_filters,
use_separable_conv=detection_head_config.use_separable_conv,
num_fcs=detection_head_config.num_fcs,
fc_dims=detection_head_config.fc_dims,
class_agnostic_bbox_pred=detection_head_config
.class_agnostic_bbox_pred,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
name='detection_head_{}'.format(cascade_num + 1))
detection_head_cascade.append(detection_head)
detection_head = detection_head_cascade
roi_generator_obj = roi_generator.MultilevelROIGenerator(
pre_nms_top_k=roi_generator_config.pre_nms_top_k,
pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold,
pre_nms_min_size_threshold=(
roi_generator_config.pre_nms_min_size_threshold),
nms_iou_threshold=roi_generator_config.nms_iou_threshold,
num_proposals=roi_generator_config.num_proposals,
test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k,
test_pre_nms_score_threshold=(
roi_generator_config.test_pre_nms_score_threshold),
test_pre_nms_min_size_threshold=(
roi_generator_config.test_pre_nms_min_size_threshold),
test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold,
test_num_proposals=roi_generator_config.test_num_proposals,
use_batched_nms=roi_generator_config.use_batched_nms)
roi_sampler_cascade = []
roi_sampler_obj = roi_sampler.ROISampler(
mix_gt_boxes=roi_sampler_config.mix_gt_boxes,
num_sampled_rois=roi_sampler_config.num_sampled_rois,
foreground_fraction=roi_sampler_config.foreground_fraction,
foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold,
background_iou_high_threshold=(
roi_sampler_config.background_iou_high_threshold),
background_iou_low_threshold=(
roi_sampler_config.background_iou_low_threshold))
roi_sampler_cascade.append(roi_sampler_obj)
# Initialize addtional roi simplers for cascade heads.
if roi_sampler_config.cascade_iou_thresholds:
for iou in roi_sampler_config.cascade_iou_thresholds:
roi_sampler_obj = roi_sampler.ROISampler(
mix_gt_boxes=False,
num_sampled_rois=roi_sampler_config.num_sampled_rois,
foreground_iou_threshold=iou,
background_iou_high_threshold=iou,
background_iou_low_threshold=0.0,
skip_subsampling=True)
roi_sampler_cascade.append(roi_sampler_obj)
roi_aligner_obj = roi_aligner.MultilevelROIAligner(
crop_size=roi_aligner_config.crop_size,
sample_offset=roi_aligner_config.sample_offset)
detection_generator_obj = detection_generator.DetectionGenerator(
apply_nms=generator_config.apply_nms,
pre_nms_top_k=generator_config.pre_nms_top_k,
pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
nms_iou_threshold=generator_config.nms_iou_threshold,
max_num_detections=generator_config.max_num_detections,
nms_version=generator_config.nms_version,
use_cpu_nms=generator_config.use_cpu_nms,
soft_nms_sigma=generator_config.soft_nms_sigma)
if model_config.include_mask:
mask_head = instance_heads.MaskHead(
num_classes=model_config.num_classes,
upsample_factor=model_config.mask_head.upsample_factor,
num_convs=model_config.mask_head.num_convs,
num_filters=model_config.mask_head.num_filters,
use_separable_conv=model_config.mask_head.use_separable_conv,
activation=model_config.norm_activation.activation,
norm_momentum=model_config.norm_activation.norm_momentum,
norm_epsilon=model_config.norm_activation.norm_epsilon,
kernel_regularizer=l2_regularizer,
class_agnostic=model_config.mask_head.class_agnostic)
mask_sampler_obj = mask_sampler.MaskSampler(
mask_target_size=(
model_config.mask_roi_aligner.crop_size *
model_config.mask_head.upsample_factor),
num_sampled_masks=model_config.mask_sampler.num_sampled_masks)
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
crop_size=model_config.mask_roi_aligner.crop_size,
sample_offset=model_config.mask_roi_aligner.sample_offset)
else:
mask_head = None
mask_sampler_obj = None
mask_roi_aligner_obj = None
model = maskrcnn_model.MaskRCNNModel(
backbone=backbone,
decoder=decoder,
rpn_head=rpn_head,
detection_head=detection_head,
roi_generator=roi_generator_obj,
roi_sampler=roi_sampler_cascade,
roi_aligner=roi_aligner_obj,
detection_generator=detection_generator_obj,
mask_head=mask_head,
mask_sampler=mask_sampler_obj,
mask_roi_aligner=mask_roi_aligner_obj,
class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
cascade_class_ensemble=detection_head_config.cascade_class_ensemble,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_scales=model_config.anchor.num_scales,
aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size)
return model
def build_retinanet(
input_specs: tf.keras.layers.InputSpec,
model_config: retinanet_cfg.RetinaNet,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.Model] = None,
decoder: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds RetinaNet model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
head_config = model_config.head
generator_config = model_config.detection_generator
num_anchors_per_location = (
len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
head = dense_prediction_heads.RetinaNetHead(
min_level=model_config.min_level,
max_level=model_config.max_level,
num_classes=model_config.num_classes,
num_anchors_per_location=num_anchors_per_location,
num_convs=head_config.num_convs,
num_filters=head_config.num_filters,
attribute_heads=[
cfg.as_dict() for cfg in (head_config.attribute_heads or [])
],
use_separable_conv=head_config.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Builds decoder and head so that their trainable weights are initialized
if decoder:
decoder_features = decoder(backbone_features)
_ = head(decoder_features)
detection_generator_obj = detection_generator.MultilevelDetectionGenerator(
apply_nms=generator_config.apply_nms,
pre_nms_top_k=generator_config.pre_nms_top_k,
pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
nms_iou_threshold=generator_config.nms_iou_threshold,
max_num_detections=generator_config.max_num_detections,
nms_version=generator_config.nms_version,
use_cpu_nms=generator_config.use_cpu_nms,
soft_nms_sigma=generator_config.soft_nms_sigma)
model = retinanet_model.RetinaNetModel(
backbone,
decoder,
head,
detection_generator_obj,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_scales=model_config.anchor.num_scales,
aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size)
return model
def build_segmentation_model(
input_specs: tf.keras.layers.InputSpec,
model_config: segmentation_cfg.SemanticSegmentationModel,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.regularizers.Regularizer] = None,
decoder: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds Segmentation model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
head_config = model_config.head
head = segmentation_heads.SegmentationHead(
num_classes=model_config.num_classes,
level=head_config.level,
num_convs=head_config.num_convs,
prediction_kernel_size=head_config.prediction_kernel_size,
num_filters=head_config.num_filters,
use_depthwise_convolution=head_config.use_depthwise_convolution,
upsample_factor=head_config.upsample_factor,
feature_fusion=head_config.feature_fusion,
low_level=head_config.low_level,
low_level_num_filters=head_config.low_level_num_filters,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
mask_scoring_head = None
if model_config.mask_scoring_head:
mask_scoring_head = segmentation_heads.MaskScoring(
num_classes=model_config.num_classes,
**model_config.mask_scoring_head.as_dict(),
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
model = segmentation_model.SegmentationModel(
backbone, decoder, head, mask_scoring_head=mask_scoring_head)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
# Import libraries
import tensorflow as tf
from official.core import registry
from official.vision.configs import video_classification as video_classification_cfg
from official.vision.modeling import video_classification_model
from official.vision.modeling import backbones
_REGISTERED_MODEL_CLS = {}
def register_model_builder(key: str):
"""Decorates a builder of model class.
The builder should be a Callable (a class or a function).
This decorator supports registration of backbone builder as follows:
```
class MyModel(tf.keras.Model):
pass
@register_backbone_builder('mybackbone')
def builder(input_specs, config, l2_reg):
return MyModel(...)
# Builds a MyModel object.
my_backbone = build_backbone_3d(input_specs, config, l2_reg)
```
Args:
key: the key to look up the builder.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of model class.
"""
return registry.register(_REGISTERED_MODEL_CLS, key)
def build_model(
model_type: str,
input_specs: tf.keras.layers.InputSpec,
model_config: video_classification_cfg.hyperparams.Config,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
"""Builds backbone from a config.
Args:
model_type: string name of model type. It should be consistent with
ModelConfig.model_type.
input_specs: tf.keras.layers.InputSpec.
model_config: a OneOfConfig. Model config.
num_classes: number of classes.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
Returns:
tf.keras.Model instance of the backbone.
"""
model_builder = registry.lookup(_REGISTERED_MODEL_CLS, model_type)
return model_builder(input_specs, model_config, num_classes, l2_regularizer)
@register_model_builder('video_classification')
def build_video_classification_model(
input_specs: tf.keras.layers.InputSpec,
model_config: video_classification_cfg.VideoClassificationModel,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
"""Builds the video classification model."""
input_specs_dict = {'image': input_specs}
norm_activation_config = model_config.norm_activation
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
model = video_classification_model.VideoClassificationModel(
backbone=backbone,
num_classes=num_classes,
input_specs=input_specs_dict,
dropout_rate=model_config.dropout_rate,
aggregate_endpoints=model_config.aggregate_endpoints,
kernel_regularizer=l2_regularizer,
require_endpoints=model_config.require_endpoints)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for factory.py."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.configs import backbones
from official.vision.configs import backbones_3d
from official.vision.configs import image_classification as classification_cfg
from official.vision.configs import maskrcnn as maskrcnn_cfg
from official.vision.configs import retinanet as retinanet_cfg
from official.vision.configs import video_classification as video_classification_cfg
from official.vision.modeling import factory
from official.vision.modeling import factory_3d
class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (224, 224), 5e-5),
('resnet', (224, 224), None),
('resnet', (None, None), 5e-5),
('resnet', (None, None), None),
)
def test_builder(self, backbone_type, input_size, weight_decay):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model_config = classification_cfg.ImageClassificationModel(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type))
l2_regularizer = (
tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
_ = factory.build_classification_model(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
class MaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (640, 640)),
('resnet', (None, None)),
)
def test_builder(self, backbone_type, input_size):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model_config = maskrcnn_cfg.MaskRCNN(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type))
l2_regularizer = tf.keras.regularizers.l2(5e-5)
_ = factory.build_maskrcnn(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (640, 640), False),
('resnet', (None, None), True),
)
def test_builder(self, backbone_type, input_size, has_att_heads):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
if has_att_heads:
attribute_heads_config = [
retinanet_cfg.AttributeHead(name='att1'),
retinanet_cfg.AttributeHead(
name='att2', type='classification', size=2),
]
else:
attribute_heads_config = None
model_config = retinanet_cfg.RetinaNet(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type),
head=retinanet_cfg.RetinaNetHead(
attribute_heads=attribute_heads_config))
l2_regularizer = tf.keras.regularizers.l2(5e-5)
_ = factory.build_retinanet(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
if has_att_heads:
self.assertEqual(model_config.head.attribute_heads[0].as_dict(),
dict(name='att1', type='regression', size=1))
self.assertEqual(model_config.head.attribute_heads[1].as_dict(),
dict(name='att2', type='classification', size=2))
class VideoClassificationModelBuilderTest(parameterized.TestCase,
tf.test.TestCase):
@parameterized.parameters(
('resnet_3d', (8, 224, 224), 5e-5),
('resnet_3d', (None, None, None), 5e-5),
)
def test_builder(self, backbone_type, input_size, weight_decay):
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], input_size[2], 3])
model_config = video_classification_cfg.VideoClassificationModel(
backbone=backbones_3d.Backbone3D(type=backbone_type))
l2_regularizer = (
tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
_ = factory_3d.build_video_classification_model(
input_specs=input_specs,
model_config=model_config,
num_classes=2,
l2_regularizer=l2_regularizer)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Heads package definition."""
from official.vision.modeling.heads.dense_prediction_heads import RetinaNetHead
from official.vision.modeling.heads.dense_prediction_heads import RPNHead
from official.vision.modeling.heads.instance_heads import DetectionHead
from official.vision.modeling.heads.instance_heads import MaskHead
from official.vision.modeling.heads.segmentation_heads import SegmentationHead
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of dense prediction heads."""
from typing import Any, Dict, List, Mapping, Optional, Union
# Import libraries
import numpy as np
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class RetinaNetHead(tf.keras.layers.Layer):
"""Creates a RetinaNet head."""
def __init__(
self,
min_level: int,
max_level: int,
num_classes: int,
num_anchors_per_location: int,
num_convs: int = 4,
num_filters: int = 256,
attribute_heads: Optional[List[Dict[str, Any]]] = None,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
num_params_per_anchor: int = 4,
**kwargs):
"""Initializes a RetinaNet head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_classes: An `int` number of classes to predict.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate conv layers.
attribute_heads: If not None, a list that contains a dict for each
additional attribute head. Each dict consists of 3 key-value pairs:
`name`, `type` ('regression' or 'classification'), and `size` (number
of predicted values for each instance).
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
num_params_per_anchor: Number of parameters required to specify an anchor
box. For example, `num_params_per_anchor` would be 4 for axis-aligned
anchor boxes specified by their y-centers, x-centers, heights, and
widths.
**kwargs: Additional keyword arguments to be passed.
"""
super(RetinaNetHead, self).__init__(**kwargs)
self._config_dict = {
'min_level': min_level,
'max_level': max_level,
'num_classes': num_classes,
'num_anchors_per_location': num_anchors_per_location,
'num_convs': num_convs,
'num_filters': num_filters,
'attribute_heads': attribute_heads,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'num_params_per_anchor': num_params_per_anchor,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=0.01),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
# Class net.
self._cls_convs = []
self._cls_norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_cls_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
cls_conv_name = 'classnet-conv_{}'.format(i)
self._cls_convs.append(conv_op(name=cls_conv_name, **conv_kwargs))
cls_norm_name = 'classnet-conv-norm_{}_{}'.format(level, i)
this_level_cls_norms.append(bn_op(name=cls_norm_name, **bn_kwargs))
self._cls_norms.append(this_level_cls_norms)
classifier_kwargs = {
'filters': (
self._config_dict['num_classes'] *
self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
classifier_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._classifier = conv_op(name='scores', **classifier_kwargs)
# Box net.
self._box_convs = []
self._box_norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_box_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
box_conv_name = 'boxnet-conv_{}'.format(i)
self._box_convs.append(conv_op(name=box_conv_name, **conv_kwargs))
box_norm_name = 'boxnet-conv-norm_{}_{}'.format(level, i)
this_level_box_norms.append(bn_op(name=box_norm_name, **bn_kwargs))
self._box_norms.append(this_level_box_norms)
box_regressor_kwargs = {
'filters': (self._config_dict['num_params_per_anchor'] *
self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
box_regressor_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._box_regressor = conv_op(name='boxes', **box_regressor_kwargs)
# Attribute learning nets.
if self._config_dict['attribute_heads']:
self._att_predictors = {}
self._att_convs = {}
self._att_norms = {}
for att_config in self._config_dict['attribute_heads']:
att_name = att_config['name']
att_type = att_config['type']
att_size = att_config['size']
att_convs_i = []
att_norms_i = []
# Build conv and norm layers.
for level in range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1):
this_level_att_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
att_conv_name = '{}-conv_{}'.format(att_name, i)
att_convs_i.append(conv_op(name=att_conv_name, **conv_kwargs))
att_norm_name = '{}-conv-norm_{}_{}'.format(att_name, level, i)
this_level_att_norms.append(bn_op(name=att_norm_name, **bn_kwargs))
att_norms_i.append(this_level_att_norms)
self._att_convs[att_name] = att_convs_i
self._att_norms[att_name] = att_norms_i
# Build the final prediction layer.
att_predictor_kwargs = {
'filters':
(att_size * self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if att_type == 'regression':
att_predictor_kwargs.update(
{'bias_initializer': tf.zeros_initializer()})
elif att_type == 'classification':
att_predictor_kwargs.update({
'bias_initializer':
tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
})
else:
raise ValueError(
'Attribute head type {} not supported.'.format(att_type))
if not self._config_dict['use_separable_conv']:
att_predictor_kwargs.update({
'kernel_initializer':
tf.keras.initializers.RandomNormal(stddev=1e-5),
'kernel_regularizer':
self._config_dict['kernel_regularizer'],
})
self._att_predictors[att_name] = conv_op(
name='{}_attributes'.format(att_name), **att_predictor_kwargs)
super(RetinaNetHead, self).build(input_shape)
def call(self, features: Mapping[str, tf.Tensor]):
"""Forward pass of the RetinaNet head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l,
num_params_per_anchor * num_anchors_per_location].
attributes: a dict of (attribute_name, attribute_prediction). Each
`attribute_prediction` is a dict of:
- key: `str`, the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature
level, whose shape is
[batch, height_l, width_l,
attribute_size * num_anchors_per_location].
Can be an empty dictionary if no attribute learning is required.
"""
scores = {}
boxes = {}
if self._config_dict['attribute_heads']:
attributes = {
att_config['name']: {}
for att_config in self._config_dict['attribute_heads']
}
else:
attributes = {}
for i, level in enumerate(
range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1)):
this_level_features = features[str(level)]
# class net.
x = this_level_features
for conv, norm in zip(self._cls_convs, self._cls_norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
scores[str(level)] = self._classifier(x)
# box net.
x = this_level_features
for conv, norm in zip(self._box_convs, self._box_norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
boxes[str(level)] = self._box_regressor(x)
# attribute nets.
if self._config_dict['attribute_heads']:
for att_config in self._config_dict['attribute_heads']:
att_name = att_config['name']
x = this_level_features
for conv, norm in zip(self._att_convs[att_name],
self._att_norms[att_name][i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
attributes[att_name][str(level)] = self._att_predictors[att_name](x)
return scores, boxes, attributes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Vision')
class RPNHead(tf.keras.layers.Layer):
"""Creates a Region Proposal Network (RPN) head."""
def __init__(
self,
min_level: int,
max_level: int,
num_anchors_per_location: int,
num_convs: int = 1,
num_filters: int = 256,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a Region Proposal Network head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(RPNHead, self).__init__(**kwargs)
self._config_dict = {
'min_level': min_level,
'max_level': max_level,
'num_anchors_per_location': num_anchors_per_location,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=0.01),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
conv_name = 'rpn-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
norm_name = 'rpn-conv-norm_{}_{}'.format(level, i)
this_level_norms.append(bn_op(name=norm_name, **bn_kwargs))
self._norms.append(this_level_norms)
classifier_kwargs = {
'filters': self._config_dict['num_anchors_per_location'],
'kernel_size': 1,
'padding': 'valid',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
classifier_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._classifier = conv_op(name='rpn-scores', **classifier_kwargs)
box_regressor_kwargs = {
'filters': 4 * self._config_dict['num_anchors_per_location'],
'kernel_size': 1,
'padding': 'valid',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
box_regressor_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._box_regressor = conv_op(name='rpn-boxes', **box_regressor_kwargs)
super(RPNHead, self).build(input_shape)
def call(self, features: Mapping[str, tf.Tensor]):
"""Forward pass of the RPN head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
scores = {}
boxes = {}
for i, level in enumerate(
range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1)):
x = features[str(level)]
for conv, norm in zip(self._convs, self._norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
scores[str(level)] = self._classifier(x)
boxes[str(level)] = self._box_regressor(x)
return scores, boxes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for dense_prediction_heads.py."""
# Import libraries
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.modeling.heads import dense_prediction_heads
class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(False, False, False),
(False, True, False),
(True, False, True),
(True, True, True),
)
def test_forward(self, use_separable_conv, use_sync_bn, has_att_heads):
if has_att_heads:
attribute_heads = [dict(name='depth', type='regression', size=1)]
else:
attribute_heads = None
retinanet_head = dense_prediction_heads.RetinaNetHead(
min_level=3,
max_level=4,
num_classes=3,
num_anchors_per_location=3,
num_convs=2,
num_filters=256,
attribute_heads=attribute_heads,
use_separable_conv=use_separable_conv,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
}
scores, boxes, attributes = retinanet_head(features)
self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 9])
self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 9])
self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
if has_att_heads:
for att in attributes.values():
self.assertAllEqual(att['3'].numpy().shape, [2, 128, 128, 3])
self.assertAllEqual(att['4'].numpy().shape, [2, 64, 64, 3])
def test_serialize_deserialize(self):
retinanet_head = dense_prediction_heads.RetinaNetHead(
min_level=3,
max_level=7,
num_classes=3,
num_anchors_per_location=9,
num_convs=2,
num_filters=16,
attribute_heads=None,
use_separable_conv=False,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = retinanet_head.get_config()
new_retinanet_head = (
dense_prediction_heads.RetinaNetHead.from_config(config))
self.assertAllEqual(
retinanet_head.get_config(), new_retinanet_head.get_config())
class RpnHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(False, False),
(False, True),
(True, False),
(True, True),
)
def test_forward(self, use_separable_conv, use_sync_bn):
rpn_head = dense_prediction_heads.RPNHead(
min_level=3,
max_level=4,
num_anchors_per_location=3,
num_convs=2,
num_filters=256,
use_separable_conv=use_separable_conv,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
}
scores, boxes = rpn_head(features)
self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 3])
self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 3])
self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
def test_serialize_deserialize(self):
rpn_head = dense_prediction_heads.RPNHead(
min_level=3,
max_level=7,
num_anchors_per_location=9,
num_convs=2,
num_filters=16,
use_separable_conv=False,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = rpn_head.get_config()
new_rpn_head = dense_prediction_heads.RPNHead.from_config(config)
self.assertAllEqual(rpn_head.get_config(), new_rpn_head.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of instance prediction heads."""
from typing import List, Union, Optional
# Import libraries
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class DetectionHead(tf.keras.layers.Layer):
"""Creates a detection head."""
def __init__(
self,
num_classes: int,
num_convs: int = 0,
num_filters: int = 256,
use_separable_conv: bool = False,
num_fcs: int = 2,
fc_dims: int = 1024,
class_agnostic_bbox_pred: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a detection head.
Args:
num_classes: An `int` for the number of classes.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the FC layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
num_fcs: An `int` number that represents the number of FC layers before
the predictions.
fc_dims: An `int` number that represents the number of dimension of the FC
layers.
class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be
predicted for every class or not.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(DetectionHead, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'num_fcs': num_fcs,
'fc_dims': fc_dims,
'class_agnostic_bbox_pred': class_agnostic_bbox_pred,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._conv_norms = []
for i in range(self._config_dict['num_convs']):
conv_name = 'detection-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
bn_name = 'detection-conv-bn_{}'.format(i)
self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._fcs = []
self._fc_norms = []
for i in range(self._config_dict['num_fcs']):
fc_name = 'detection-fc_{}'.format(i)
self._fcs.append(
tf.keras.layers.Dense(
units=self._config_dict['fc_dims'],
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=1 / 3.0, mode='fan_out', distribution='uniform'),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name=fc_name))
bn_name = 'detection-fc-bn_{}'.format(i)
self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._classifier = tf.keras.layers.Dense(
units=self._config_dict['num_classes'],
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='detection-scores')
num_box_outputs = (4 if self._config_dict['class_agnostic_bbox_pred'] else
self._config_dict['num_classes'] * 4)
self._box_regressor = tf.keras.layers.Dense(
units=num_box_outputs,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='detection-boxes')
super(DetectionHead, self).build(input_shape)
def call(self, inputs: tf.Tensor, training: bool = None):
"""Forward pass of box and class branches for the Mask-RCNN model.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height,
roi_width, roi_channels], representing the ROI features.
training: a `bool` indicating whether it is in `training` mode.
Returns:
class_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
roi_features = inputs
_, num_rois, height, width, filters = roi_features.get_shape().as_list()
x = tf.reshape(roi_features, [-1, height, width, filters])
for conv, bn in zip(self._convs, self._conv_norms):
x = conv(x)
x = bn(x)
x = self._activation(x)
_, _, _, filters = x.get_shape().as_list()
x = tf.reshape(x, [-1, num_rois, height * width * filters])
for fc, bn in zip(self._fcs, self._fc_norms):
x = fc(x)
x = bn(x)
x = self._activation(x)
classes = self._classifier(x)
boxes = self._box_regressor(x)
return classes, boxes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Vision')
class MaskHead(tf.keras.layers.Layer):
"""Creates a mask head."""
def __init__(
self,
num_classes: int,
upsample_factor: int = 2,
num_convs: int = 4,
num_filters: int = 256,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
class_agnostic: bool = False,
**kwargs):
"""Initializes a mask head.
Args:
num_classes: An `int` of the number of classes.
upsample_factor: An `int` that indicates the upsample factor to generate
the final predicted masks. It should be >= 1.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the mask prediction layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
class_agnostic: A `bool`. If set, we use a single channel mask head that
is shared between all classes.
**kwargs: Additional keyword arguments to be passed.
"""
super(MaskHead, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'upsample_factor': upsample_factor,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'class_agnostic': class_agnostic
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._conv_norms = []
for i in range(self._config_dict['num_convs']):
conv_name = 'mask-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
bn_name = 'mask-conv-bn_{}'.format(i)
self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._deconv = tf.keras.layers.Conv2DTranspose(
filters=self._config_dict['num_filters'],
kernel_size=self._config_dict['upsample_factor'],
strides=self._config_dict['upsample_factor'],
padding='valid',
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='mask-upsampling')
self._deconv_bn = bn_op(name='mask-deconv-bn', **bn_kwargs)
if self._config_dict['class_agnostic']:
num_filters = 1
else:
num_filters = self._config_dict['num_classes']
conv_kwargs = {
'filters': num_filters,
'kernel_size': 1,
'padding': 'valid',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
self._mask_regressor = conv_op(name='mask-logits', **conv_kwargs)
super(MaskHead, self).build(input_shape)
def call(self, inputs: List[tf.Tensor], training: bool = None):
"""Forward pass of mask branch for the Mask-RCNN model.
Args:
inputs: A `list` of two tensors where
inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances,
roi_height, roi_width, roi_channels], representing the ROI features.
inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances],
representing the classes of the ROIs.
training: A `bool` indicating whether it is in `training` mode.
Returns:
mask_outputs: A `tf.Tensor` of shape
[batch_size, num_instances, roi_height * upsample_factor,
roi_width * upsample_factor], representing the mask predictions.
"""
roi_features, roi_classes = inputs
batch_size, num_rois, height, width, filters = (
roi_features.get_shape().as_list())
if batch_size is None:
batch_size = tf.shape(roi_features)[0]
x = tf.reshape(roi_features, [-1, height, width, filters])
for conv, bn in zip(self._convs, self._conv_norms):
x = conv(x)
x = bn(x)
x = self._activation(x)
x = self._deconv(x)
x = self._deconv_bn(x)
x = self._activation(x)
logits = self._mask_regressor(x)
mask_height = height * self._config_dict['upsample_factor']
mask_width = width * self._config_dict['upsample_factor']
if self._config_dict['class_agnostic']:
logits = tf.reshape(logits, [-1, num_rois, mask_height, mask_width, 1])
else:
logits = tf.reshape(
logits,
[-1, num_rois, mask_height, mask_width,
self._config_dict['num_classes']])
batch_indices = tf.tile(
tf.expand_dims(tf.range(batch_size), axis=1), [1, num_rois])
mask_indices = tf.tile(
tf.expand_dims(tf.range(num_rois), axis=0), [batch_size, 1])
if self._config_dict['class_agnostic']:
class_gather_indices = tf.zeros_like(roi_classes, dtype=tf.int32)
else:
class_gather_indices = tf.cast(roi_classes, dtype=tf.int32)
gather_indices = tf.stack(
[batch_indices, mask_indices, class_gather_indices],
axis=2)
mask_outputs = tf.gather_nd(
tf.transpose(logits, [0, 1, 4, 2, 3]), gather_indices)
return mask_outputs
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment