Commit e4be7e00 authored by Yeqing Li's avatar Yeqing Li Committed by A. Unique TensorFlower
Browse files

Removes unneeded content of the beta folder.

PiperOrigin-RevId: 437276665
parent f47405b5
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoder registers and factory method.
One can register a new decoder model by the following two steps:
1 Import the factory and register the build in the decoder file.
2 Import the decoder class and add a build in __init__.py.
```
# my_decoder.py
from modeling.decoders import factory
class MyDecoder():
...
@factory.register_decoder_builder('my_decoder')
def build_my_decoder():
return MyDecoder()
# decoders/__init__.py adds import
from modeling.decoders.my_decoder import MyDecoder
```
If one wants the MyDecoder class to be used only by those binary
then don't imported the decoder module in decoders/__init__.py, but import it
in place that uses it.
"""
from typing import Any, Callable, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.core import registry
from official.modeling import hyperparams
_REGISTERED_DECODER_CLS = {}
def register_decoder_builder(key: str) -> Callable[..., Any]:
"""Decorates a builder of decoder class.
The builder should be a Callable (a class or a function).
This decorator supports registration of decoder builder as follows:
```
class MyDecoder(tf.keras.Model):
pass
@register_decoder_builder('mydecoder')
def builder(input_specs, config, l2_reg):
return MyDecoder(...)
# Builds a MyDecoder object.
my_decoder = build_decoder_3d(input_specs, config, l2_reg)
```
Args:
key: A `str` of key to look up the builder.
Returns:
A callable for using as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return registry.register(_REGISTERED_DECODER_CLS, key)
@register_decoder_builder('identity')
def build_identity(
input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
model_config: Optional[hyperparams.Config] = None,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
"""Builds identity decoder from a config.
All the input arguments are not used by identity decoder but kept here to
ensure the interface is consistent.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
An instance of the identity decoder.
"""
del input_specs, model_config, l2_regularizer # Unused by identity decoder.
def build_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]: # pytype: disable=annotation-type-mismatch # typed-keras
"""Builds decoder from a config.
A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
None, the decoder will take features from the backbone as input and generate
decoded feature maps. If it is None, such as an identity decoder, the decoder
is skipped and features from the backbone are regarded as model output.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
**kwargs: Additional keyword args to be passed to decoder builder.
Returns:
An instance of the decoder.
"""
decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
model_config.decoder.type)
return decoder_builder(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer,
**kwargs)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from official.vision.beta import configs
from official.vision.beta.configs import decoders as decoders_cfg
from official.vision.beta.modeling import decoders
from official.vision.beta.modeling.decoders import factory
class FactoryTest(tf.test.TestCase, parameterized.TestCase):
@combinations.generate(
combinations.combine(
num_filters=[128, 256], use_separable_conv=[True, False]))
def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
"""Test creation of FPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.FPN(
input_specs=input_specs,
num_filters=num_filters,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='fpn',
fpn=decoders_cfg.FPN(
num_filters=num_filters, use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
num_filters=[128, 256],
num_repeats=[3, 5],
use_separable_conv=[True, False]))
def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
use_separable_conv):
"""Test creation of NASFPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.NASFPN(
input_specs=input_specs,
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='nasfpn',
nasfpn=decoders_cfg.NASFPN(
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
level=[3, 4],
dilation_rates=[[6, 12, 18], [6, 12]],
num_filters=[128, 256]))
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
"""Test creation of ASPP decoder."""
input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
network = decoders.ASPP(
level=level,
dilation_rates=dilation_rates,
num_filters=num_filters,
use_sync_bn=True)
model_config = configs.semantic_segmentation.SemanticSegmentationModel()
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='aspp',
aspp=decoders_cfg.ASPP(
level=level, dilation_rates=dilation_rates,
num_filters=num_filters))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
# Due to calling `super().get_config()` in aspp layer, everything but the
# the name of two layer instances are the same, so we force equal name so it
# will not give false alarm.
factory_network_config['name'] = network_config['name']
self.assertEqual(network_config, factory_network_config)
def test_identity_decoder_creation(self):
"""Test creation of identity decoder."""
model_config = configs.retinanet.RetinaNet()
model_config.num_classes = 2
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='identity', identity=decoders_cfg.Identity())
factory_network = factory.build_decoder(
input_specs=None, model_config=model_config)
self.assertIsNone(factory_network)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Feature Pyramid Networks (FPN)."""
from typing import Any, Mapping, Optional
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.beta.modeling.decoders import factory
from official.vision.beta.ops import spatial_transform_ops
@tf.keras.utils.register_keras_serializable(package='Beta')
class FPN(tf.keras.Model):
"""Creates a Feature Pyramid Network (FPN).
This implemets the paper:
Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan, and
Serge Belongie.
Feature Pyramid Networks for Object Detection.
(https://arxiv.org/pdf/1612.03144)
"""
def __init__(
self,
input_specs: Mapping[str, tf.TensorShape],
min_level: int = 3,
max_level: int = 7,
num_filters: int = 256,
fusion_type: str = 'sum',
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a Feature Pyramid Network (FPN).
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
num_filters: An `int` number of filters in FPN layers.
fusion_type: A `str` of `sum` or `concat`. Whether performing sum or
concat for feature fusion.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self._config_dict = {
'input_specs': input_specs,
'min_level': min_level,
'max_level': max_level,
'num_filters': num_filters,
'fusion_type': fusion_type,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if use_separable_conv:
conv2d = tf.keras.layers.SeparableConv2D
else:
conv2d = tf.keras.layers.Conv2D
if use_sync_bn:
norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
norm = tf.keras.layers.BatchNormalization
activation_fn = tf.keras.layers.Activation(
tf_utils.get_activation(activation))
# Build input feature pyramid.
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = -1
else:
bn_axis = 1
# Get input feature pyramid from backbone.
logging.info('FPN input_specs: %s', input_specs)
inputs = self._build_input_pyramid(input_specs, min_level)
backbone_max_level = min(int(max(inputs.keys())), max_level)
# Build lateral connections.
feats_lateral = {}
for level in range(min_level, backbone_max_level + 1):
feats_lateral[str(level)] = conv2d(
filters=num_filters,
kernel_size=1,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
inputs[str(level)])
# Build top-down path.
feats = {str(backbone_max_level): feats_lateral[str(backbone_max_level)]}
for level in range(backbone_max_level - 1, min_level - 1, -1):
feat_a = spatial_transform_ops.nearest_upsampling(
feats[str(level + 1)], 2)
feat_b = feats_lateral[str(level)]
if fusion_type == 'sum':
feats[str(level)] = feat_a + feat_b
elif fusion_type == 'concat':
feats[str(level)] = tf.concat([feat_a, feat_b], axis=-1)
else:
raise ValueError('Fusion type {} not supported.'.format(fusion_type))
# TODO(xianzhi): consider to remove bias in conv2d.
# Build post-hoc 3x3 convolution kernel.
for level in range(min_level, backbone_max_level + 1):
feats[str(level)] = conv2d(
filters=num_filters,
strides=1,
kernel_size=3,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
feats[str(level)])
# TODO(xianzhi): consider to remove bias in conv2d.
# Build coarser FPN levels introduced for RetinaNet.
for level in range(backbone_max_level + 1, max_level + 1):
feats_in = feats[str(level - 1)]
if level > backbone_max_level + 1:
feats_in = activation_fn(feats_in)
feats[str(level)] = conv2d(
filters=num_filters,
strides=2,
kernel_size=3,
padding='same',
kernel_initializer=kernel_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer)(
feats_in)
# Apply batch norm layers.
for level in range(min_level, max_level + 1):
feats[str(level)] = norm(
axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)(
feats[str(level)])
self._output_specs = {
str(level): feats[str(level)].get_shape()
for level in range(min_level, max_level + 1)
}
super(FPN, self).__init__(inputs=inputs, outputs=feats, **kwargs)
def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
min_level: int):
assert isinstance(input_specs, dict)
if min(input_specs.keys()) > str(min_level):
raise ValueError(
'Backbone min level should be less or equal to FPN min level')
inputs = {}
for level, spec in input_specs.items():
inputs[level] = tf.keras.Input(shape=spec[1:])
return inputs
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('fpn')
def build_fpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'fpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `fpn`.')
norm_activation_config = model_config.norm_activation
return FPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
fusion_type=decoder_cfg.fusion_type,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for FPN."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.modeling.backbones import mobilenet
from official.vision.beta.modeling.backbones import resnet
from official.vision.beta.modeling.decoders import fpn
class FPNTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(256, 3, 7, False, 'sum'),
(256, 3, 7, True, 'concat'),
)
def test_network_creation(self, input_size, min_level, max_level,
use_separable_conv, fusion_type):
"""Test creation of FPN."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = resnet.ResNet(model_id=50)
network = fpn.FPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
fusion_type=fusion_type,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
@parameterized.parameters(
(256, 3, 7, False),
(256, 3, 7, True),
)
def test_network_creation_with_mobilenet(self, input_size, min_level,
max_level, use_separable_conv):
"""Test creation of FPN with mobilenet backbone."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = mobilenet.MobileNet(model_id='MobileNetV2')
network = fpn.FPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
input_specs=resnet.ResNet(model_id=50).output_specs,
min_level=3,
max_level=7,
num_filters=256,
fusion_type='sum',
use_separable_conv=False,
use_sync_bn=False,
activation='relu',
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
)
network = fpn.FPN(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = fpn.FPN.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from typing import Any, List, Mapping, Optional, Tuple
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.beta.modeling.decoders import factory
from official.vision.beta.ops import spatial_transform_ops
# The fixed NAS-FPN architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, combine_fn, (input_offset0, input_offset1), is_output).
NASFPN_BLOCK_SPECS = [
(4, 'attention', (1, 3), False),
(4, 'sum', (1, 5), False),
(3, 'sum', (0, 6), True),
(4, 'sum', (6, 7), True),
(5, 'attention', (7, 8), True),
(7, 'attention', (6, 9), True),
(6, 'attention', (9, 10), True),
]
class BlockSpec():
"""A container class that specifies the block configuration for NAS-FPN."""
def __init__(self, level: int, combine_fn: str,
input_offsets: Tuple[int, int], is_output: bool):
self.level = level
self.combine_fn = combine_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(
block_specs: Optional[List[Tuple[Any, ...]]] = None) -> List[BlockSpec]:
"""Builds the list of BlockSpec objects for NAS-FPN."""
if not block_specs:
block_specs = NASFPN_BLOCK_SPECS
logging.info('Building NAS-FPN block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf.keras.utils.register_keras_serializable(package='Beta')
class NASFPN(tf.keras.Model):
"""Creates a NAS-FPN model.
This implements the paper:
Golnaz Ghiasi, Tsung-Yi Lin, Ruoming Pang, Quoc V. Le.
NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection.
(https://arxiv.org/abs/1904.07392)
"""
def __init__(
self,
input_specs: Mapping[str, tf.TensorShape],
min_level: int = 3,
max_level: int = 7,
block_specs: List[BlockSpec] = build_block_specs(),
num_filters: int = 256,
num_repeats: int = 5,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_initializer: str = 'VarianceScaling',
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a NAS-FPN model.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
min_level: An `int` of minimum level in FPN output feature maps.
max_level: An `int` of maximum level in FPN output feature maps.
block_specs: a list of BlockSpec objects that specifies the NAS-FPN
network topology. By default, the previously discovered architecture is
used.
num_filters: An `int` number of filters in FPN layers.
num_repeats: number of repeats for feature pyramid network.
use_separable_conv: A `bool`. If True use separable convolution for
convolution in FPN layers.
activation: A `str` name of the activation function.
use_sync_bn: A `bool`. If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_initializer: A `str` name of kernel_initializer for convolutional
layers.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
self._config_dict = {
'input_specs': input_specs,
'min_level': min_level,
'max_level': max_level,
'num_filters': num_filters,
'num_repeats': num_repeats,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_initializer': kernel_initializer,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._num_repeats = num_repeats
self._conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
if self._config_dict['use_separable_conv']:
self._conv_kwargs = {
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
else:
self._conv_kwargs = {
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
}
self._norm_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._norm_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._activation = tf_utils.get_activation(activation)
# Gets input feature pyramid from backbone.
inputs = self._build_input_pyramid(input_specs, min_level)
# Projects the input features.
feats = []
for level in range(self._min_level, self._max_level + 1):
if str(level) in inputs.keys():
feats.append(self._resample_feature_map(
inputs[str(level)], level, level, self._config_dict['num_filters']))
else:
feats.append(self._resample_feature_map(
feats[-1], level - 1, level, self._config_dict['num_filters']))
# Repeatly builds the NAS-FPN modules.
for _ in range(self._num_repeats):
output_feats = self._build_feature_pyramid(feats)
feats = [output_feats[level]
for level in range(self._min_level, self._max_level + 1)]
self._output_specs = {
str(level): output_feats[level].get_shape()
for level in range(min_level, max_level + 1)
}
output_feats = {str(level): output_feats[level]
for level in output_feats.keys()}
super(NASFPN, self).__init__(inputs=inputs, outputs=output_feats, **kwargs)
def _build_input_pyramid(self, input_specs: Mapping[str, tf.TensorShape],
min_level: int):
assert isinstance(input_specs, dict)
if min(input_specs.keys()) > str(min_level):
raise ValueError(
'Backbone min level should be less or equal to FPN min level')
inputs = {}
for level, spec in input_specs.items():
inputs[level] = tf.keras.Input(shape=spec[1:])
return inputs
def _resample_feature_map(self,
inputs,
input_level,
target_level,
target_num_filters=256):
x = inputs
_, _, _, input_num_filters = x.get_shape().as_list()
if input_num_filters != target_num_filters:
x = self._conv_op(
filters=target_num_filters,
kernel_size=1,
padding='same',
**self._conv_kwargs)(x)
x = self._norm_op(**self._norm_kwargs)(x)
if input_level < target_level:
stride = int(2 ** (target_level - input_level))
return tf.keras.layers.MaxPool2D(
pool_size=stride, strides=stride, padding='same')(x)
if input_level > target_level:
scale = int(2 ** (input_level - target_level))
return spatial_transform_ops.nearest_upsampling(x, scale=scale)
# Force output x to be the same dtype as mixed precision policy. This avoids
# dtype mismatch when one input (by default float32 dtype) does not meet all
# the above conditions and is output unchanged, while other inputs are
# processed to have different dtype, e.g., using bfloat16 on TPU.
compute_dtype = tf.keras.layers.Layer().dtype_policy.compute_dtype
if (compute_dtype is not None) and (x.dtype != compute_dtype):
return tf.cast(x, dtype=compute_dtype)
else:
return x
def _global_attention(self, feat0, feat1):
m = tf.math.reduce_max(feat0, axis=[1, 2], keepdims=True)
m = tf.math.sigmoid(m)
return feat0 + feat1 * m
def _build_feature_pyramid(self, feats):
num_output_connections = [0] * len(feats)
num_output_levels = self._max_level - self._min_level + 1
feat_levels = list(range(self._min_level, self._max_level + 1))
for i, block_spec in enumerate(self._block_specs):
new_level = block_spec.level
# Checks the range of input_offsets.
for input_offset in block_spec.input_offsets:
if input_offset >= len(feats):
raise ValueError(
'input_offset ({}) is larger than num feats({})'.format(
input_offset, len(feats)))
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
# Update graph with inputs.
node0 = feats[input0]
node0_level = feat_levels[input0]
num_output_connections[input0] += 1
node0 = self._resample_feature_map(node0, node0_level, new_level)
node1 = feats[input1]
node1_level = feat_levels[input1]
num_output_connections[input1] += 1
node1 = self._resample_feature_map(node1, node1_level, new_level)
# Combine node0 and node1 to create new feat.
if block_spec.combine_fn == 'sum':
new_node = node0 + node1
elif block_spec.combine_fn == 'attention':
if node0_level >= node1_level:
new_node = self._global_attention(node0, node1)
else:
new_node = self._global_attention(node1, node0)
else:
raise ValueError('unknown combine_fn `{}`.'
.format(block_spec.combine_fn))
# Add intermediate nodes that do not have any connections to output.
if block_spec.is_output:
for j, (feat, feat_level, num_output) in enumerate(
zip(feats, feat_levels, num_output_connections)):
if num_output == 0 and feat_level == new_level:
num_output_connections[j] += 1
feat_ = self._resample_feature_map(feat, feat_level, new_level)
new_node += feat_
new_node = self._activation(new_node)
new_node = self._conv_op(
filters=self._config_dict['num_filters'],
kernel_size=(3, 3),
padding='same',
**self._conv_kwargs)(new_node)
new_node = self._norm_op(**self._norm_kwargs)(new_node)
feats.append(new_node)
feat_levels.append(new_level)
num_output_connections.append(0)
output_feats = {}
for i in range(len(feats) - num_output_levels, len(feats)):
level = feat_levels[i]
output_feats[level] = feats[i]
logging.info('Output feature pyramid: %s', output_feats)
return output_feats
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@property
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('nasfpn')
def build_nasfpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'nasfpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `nasfpn`.')
norm_activation_config = model_config.norm_activation
return NASFPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
num_repeats=decoder_cfg.num_repeats,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for NAS-FPN."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.modeling.backbones import resnet
from official.vision.beta.modeling.decoders import nasfpn
class NASFPNTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(256, 3, 7, False),
(256, 3, 7, True),
)
def test_network_creation(self, input_size, min_level, max_level,
use_separable_conv):
"""Test creation of NAS-FPN."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
num_filters = 256
backbone = resnet.ResNet(model_id=50)
network = nasfpn.NASFPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
num_filters=num_filters,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, num_filters],
feats[str(level)].shape.as_list())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
from typing import Optional
import tensorflow as tf
from official.vision.beta.configs import image_classification as classification_cfg
from official.vision.beta.configs import maskrcnn as maskrcnn_cfg
from official.vision.beta.configs import retinanet as retinanet_cfg
from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
from official.vision.beta.modeling import backbones
from official.vision.beta.modeling import classification_model
from official.vision.beta.modeling import decoders
from official.vision.beta.modeling import maskrcnn_model
from official.vision.beta.modeling import retinanet_model
from official.vision.beta.modeling import segmentation_model
from official.vision.beta.modeling.heads import dense_prediction_heads
from official.vision.beta.modeling.heads import instance_heads
from official.vision.beta.modeling.heads import segmentation_heads
from official.vision.beta.modeling.layers import detection_generator
from official.vision.beta.modeling.layers import mask_sampler
from official.vision.beta.modeling.layers import roi_aligner
from official.vision.beta.modeling.layers import roi_generator
from official.vision.beta.modeling.layers import roi_sampler
def build_classification_model(
input_specs: tf.keras.layers.InputSpec,
model_config: classification_cfg.ImageClassificationModel,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
skip_logits_layer: bool = False,
backbone: Optional[tf.keras.Model] = None) -> tf.keras.Model:
"""Builds the classification model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
model = classification_model.ClassificationModel(
backbone=backbone,
num_classes=model_config.num_classes,
input_specs=input_specs,
dropout_rate=model_config.dropout_rate,
kernel_initializer=model_config.kernel_initializer,
kernel_regularizer=l2_regularizer,
add_head_batch_norm=model_config.add_head_batch_norm,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
skip_logits_layer=skip_logits_layer)
return model
def build_maskrcnn(input_specs: tf.keras.layers.InputSpec,
model_config: maskrcnn_cfg.MaskRCNN,
l2_regularizer: Optional[
tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.Model] = None,
decoder: Optional[tf.keras.Model] = None) -> tf.keras.Model:
"""Builds Mask R-CNN model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
rpn_head_config = model_config.rpn_head
roi_generator_config = model_config.roi_generator
roi_sampler_config = model_config.roi_sampler
roi_aligner_config = model_config.roi_aligner
detection_head_config = model_config.detection_head
generator_config = model_config.detection_generator
num_anchors_per_location = (
len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
rpn_head = dense_prediction_heads.RPNHead(
min_level=model_config.min_level,
max_level=model_config.max_level,
num_anchors_per_location=num_anchors_per_location,
num_convs=rpn_head_config.num_convs,
num_filters=rpn_head_config.num_filters,
use_separable_conv=rpn_head_config.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
detection_head = instance_heads.DetectionHead(
num_classes=model_config.num_classes,
num_convs=detection_head_config.num_convs,
num_filters=detection_head_config.num_filters,
use_separable_conv=detection_head_config.use_separable_conv,
num_fcs=detection_head_config.num_fcs,
fc_dims=detection_head_config.fc_dims,
class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
name='detection_head')
if decoder:
decoder_features = decoder(backbone_features)
rpn_head(decoder_features)
if roi_sampler_config.cascade_iou_thresholds:
detection_head_cascade = [detection_head]
for cascade_num in range(len(roi_sampler_config.cascade_iou_thresholds)):
detection_head = instance_heads.DetectionHead(
num_classes=model_config.num_classes,
num_convs=detection_head_config.num_convs,
num_filters=detection_head_config.num_filters,
use_separable_conv=detection_head_config.use_separable_conv,
num_fcs=detection_head_config.num_fcs,
fc_dims=detection_head_config.fc_dims,
class_agnostic_bbox_pred=detection_head_config
.class_agnostic_bbox_pred,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer,
name='detection_head_{}'.format(cascade_num + 1))
detection_head_cascade.append(detection_head)
detection_head = detection_head_cascade
roi_generator_obj = roi_generator.MultilevelROIGenerator(
pre_nms_top_k=roi_generator_config.pre_nms_top_k,
pre_nms_score_threshold=roi_generator_config.pre_nms_score_threshold,
pre_nms_min_size_threshold=(
roi_generator_config.pre_nms_min_size_threshold),
nms_iou_threshold=roi_generator_config.nms_iou_threshold,
num_proposals=roi_generator_config.num_proposals,
test_pre_nms_top_k=roi_generator_config.test_pre_nms_top_k,
test_pre_nms_score_threshold=(
roi_generator_config.test_pre_nms_score_threshold),
test_pre_nms_min_size_threshold=(
roi_generator_config.test_pre_nms_min_size_threshold),
test_nms_iou_threshold=roi_generator_config.test_nms_iou_threshold,
test_num_proposals=roi_generator_config.test_num_proposals,
use_batched_nms=roi_generator_config.use_batched_nms)
roi_sampler_cascade = []
roi_sampler_obj = roi_sampler.ROISampler(
mix_gt_boxes=roi_sampler_config.mix_gt_boxes,
num_sampled_rois=roi_sampler_config.num_sampled_rois,
foreground_fraction=roi_sampler_config.foreground_fraction,
foreground_iou_threshold=roi_sampler_config.foreground_iou_threshold,
background_iou_high_threshold=(
roi_sampler_config.background_iou_high_threshold),
background_iou_low_threshold=(
roi_sampler_config.background_iou_low_threshold))
roi_sampler_cascade.append(roi_sampler_obj)
# Initialize addtional roi simplers for cascade heads.
if roi_sampler_config.cascade_iou_thresholds:
for iou in roi_sampler_config.cascade_iou_thresholds:
roi_sampler_obj = roi_sampler.ROISampler(
mix_gt_boxes=False,
num_sampled_rois=roi_sampler_config.num_sampled_rois,
foreground_iou_threshold=iou,
background_iou_high_threshold=iou,
background_iou_low_threshold=0.0,
skip_subsampling=True)
roi_sampler_cascade.append(roi_sampler_obj)
roi_aligner_obj = roi_aligner.MultilevelROIAligner(
crop_size=roi_aligner_config.crop_size,
sample_offset=roi_aligner_config.sample_offset)
detection_generator_obj = detection_generator.DetectionGenerator(
apply_nms=generator_config.apply_nms,
pre_nms_top_k=generator_config.pre_nms_top_k,
pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
nms_iou_threshold=generator_config.nms_iou_threshold,
max_num_detections=generator_config.max_num_detections,
nms_version=generator_config.nms_version,
use_cpu_nms=generator_config.use_cpu_nms,
soft_nms_sigma=generator_config.soft_nms_sigma)
if model_config.include_mask:
mask_head = instance_heads.MaskHead(
num_classes=model_config.num_classes,
upsample_factor=model_config.mask_head.upsample_factor,
num_convs=model_config.mask_head.num_convs,
num_filters=model_config.mask_head.num_filters,
use_separable_conv=model_config.mask_head.use_separable_conv,
activation=model_config.norm_activation.activation,
norm_momentum=model_config.norm_activation.norm_momentum,
norm_epsilon=model_config.norm_activation.norm_epsilon,
kernel_regularizer=l2_regularizer,
class_agnostic=model_config.mask_head.class_agnostic)
mask_sampler_obj = mask_sampler.MaskSampler(
mask_target_size=(
model_config.mask_roi_aligner.crop_size *
model_config.mask_head.upsample_factor),
num_sampled_masks=model_config.mask_sampler.num_sampled_masks)
mask_roi_aligner_obj = roi_aligner.MultilevelROIAligner(
crop_size=model_config.mask_roi_aligner.crop_size,
sample_offset=model_config.mask_roi_aligner.sample_offset)
else:
mask_head = None
mask_sampler_obj = None
mask_roi_aligner_obj = None
model = maskrcnn_model.MaskRCNNModel(
backbone=backbone,
decoder=decoder,
rpn_head=rpn_head,
detection_head=detection_head,
roi_generator=roi_generator_obj,
roi_sampler=roi_sampler_cascade,
roi_aligner=roi_aligner_obj,
detection_generator=detection_generator_obj,
mask_head=mask_head,
mask_sampler=mask_sampler_obj,
mask_roi_aligner=mask_roi_aligner_obj,
class_agnostic_bbox_pred=detection_head_config.class_agnostic_bbox_pred,
cascade_class_ensemble=detection_head_config.cascade_class_ensemble,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_scales=model_config.anchor.num_scales,
aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size)
return model
def build_retinanet(
input_specs: tf.keras.layers.InputSpec,
model_config: retinanet_cfg.RetinaNet,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.Model] = None,
decoder: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds RetinaNet model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
backbone_features = backbone(tf.keras.Input(input_specs.shape[1:]))
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
head_config = model_config.head
generator_config = model_config.detection_generator
num_anchors_per_location = (
len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales)
head = dense_prediction_heads.RetinaNetHead(
min_level=model_config.min_level,
max_level=model_config.max_level,
num_classes=model_config.num_classes,
num_anchors_per_location=num_anchors_per_location,
num_convs=head_config.num_convs,
num_filters=head_config.num_filters,
attribute_heads=[
cfg.as_dict() for cfg in (head_config.attribute_heads or [])
],
use_separable_conv=head_config.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
# Builds decoder and head so that their trainable weights are initialized
if decoder:
decoder_features = decoder(backbone_features)
_ = head(decoder_features)
detection_generator_obj = detection_generator.MultilevelDetectionGenerator(
apply_nms=generator_config.apply_nms,
pre_nms_top_k=generator_config.pre_nms_top_k,
pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
nms_iou_threshold=generator_config.nms_iou_threshold,
max_num_detections=generator_config.max_num_detections,
nms_version=generator_config.nms_version,
use_cpu_nms=generator_config.use_cpu_nms,
soft_nms_sigma=generator_config.soft_nms_sigma)
model = retinanet_model.RetinaNetModel(
backbone,
decoder,
head,
detection_generator_obj,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_scales=model_config.anchor.num_scales,
aspect_ratios=model_config.anchor.aspect_ratios,
anchor_size=model_config.anchor.anchor_size)
return model
def build_segmentation_model(
input_specs: tf.keras.layers.InputSpec,
model_config: segmentation_cfg.SemanticSegmentationModel,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
backbone: Optional[tf.keras.regularizers.Regularizer] = None,
decoder: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds Segmentation model."""
norm_activation_config = model_config.norm_activation
if not backbone:
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
if not decoder:
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
head_config = model_config.head
head = segmentation_heads.SegmentationHead(
num_classes=model_config.num_classes,
level=head_config.level,
num_convs=head_config.num_convs,
prediction_kernel_size=head_config.prediction_kernel_size,
num_filters=head_config.num_filters,
use_depthwise_convolution=head_config.use_depthwise_convolution,
upsample_factor=head_config.upsample_factor,
feature_fusion=head_config.feature_fusion,
low_level=head_config.low_level,
low_level_num_filters=head_config.low_level_num_filters,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
mask_scoring_head = None
if model_config.mask_scoring_head:
mask_scoring_head = segmentation_heads.MaskScoring(
num_classes=model_config.num_classes,
**model_config.mask_scoring_head.as_dict(),
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
model = segmentation_model.SegmentationModel(
backbone, decoder, head, mask_scoring_head=mask_scoring_head)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory methods to build models."""
# Import libraries
import tensorflow as tf
from official.core import registry
from official.vision.beta.configs import video_classification as video_classification_cfg
from official.vision.beta.modeling import video_classification_model
from official.vision.beta.modeling import backbones
_REGISTERED_MODEL_CLS = {}
def register_model_builder(key: str):
"""Decorates a builder of model class.
The builder should be a Callable (a class or a function).
This decorator supports registration of backbone builder as follows:
```
class MyModel(tf.keras.Model):
pass
@register_backbone_builder('mybackbone')
def builder(input_specs, config, l2_reg):
return MyModel(...)
# Builds a MyModel object.
my_backbone = build_backbone_3d(input_specs, config, l2_reg)
```
Args:
key: the key to look up the builder.
Returns:
A callable for use as class decorator that registers the decorated class
for creation from an instance of model class.
"""
return registry.register(_REGISTERED_MODEL_CLS, key)
def build_model(
model_type: str,
input_specs: tf.keras.layers.InputSpec,
model_config: video_classification_cfg.hyperparams.Config,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
"""Builds backbone from a config.
Args:
model_type: string name of model type. It should be consistent with
ModelConfig.model_type.
input_specs: tf.keras.layers.InputSpec.
model_config: a OneOfConfig. Model config.
num_classes: number of classes.
l2_regularizer: tf.keras.regularizers.Regularizer instance. Default to None.
Returns:
tf.keras.Model instance of the backbone.
"""
model_builder = registry.lookup(_REGISTERED_MODEL_CLS, model_type)
return model_builder(input_specs, model_config, num_classes, l2_regularizer)
@register_model_builder('video_classification')
def build_video_classification_model(
input_specs: tf.keras.layers.InputSpec,
model_config: video_classification_cfg.VideoClassificationModel,
num_classes: int,
l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model:
"""Builds the video classification model."""
input_specs_dict = {'image': input_specs}
norm_activation_config = model_config.norm_activation
backbone = backbones.factory.build_backbone(
input_specs=input_specs,
backbone_config=model_config.backbone,
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
model = video_classification_model.VideoClassificationModel(
backbone=backbone,
num_classes=num_classes,
input_specs=input_specs_dict,
dropout_rate=model_config.dropout_rate,
aggregate_endpoints=model_config.aggregate_endpoints,
kernel_regularizer=l2_regularizer,
require_endpoints=model_config.require_endpoints)
return model
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for factory.py."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.configs import backbones
from official.vision.beta.configs import backbones_3d
from official.vision.beta.configs import image_classification as classification_cfg
from official.vision.beta.configs import maskrcnn as maskrcnn_cfg
from official.vision.beta.configs import retinanet as retinanet_cfg
from official.vision.beta.configs import video_classification as video_classification_cfg
from official.vision.beta.modeling import factory
from official.vision.beta.modeling import factory_3d
class ClassificationModelBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (224, 224), 5e-5),
('resnet', (224, 224), None),
('resnet', (None, None), 5e-5),
('resnet', (None, None), None),
)
def test_builder(self, backbone_type, input_size, weight_decay):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model_config = classification_cfg.ImageClassificationModel(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type))
l2_regularizer = (
tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
_ = factory.build_classification_model(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
class MaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (640, 640)),
('resnet', (None, None)),
)
def test_builder(self, backbone_type, input_size):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
model_config = maskrcnn_cfg.MaskRCNN(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type))
l2_regularizer = tf.keras.regularizers.l2(5e-5)
_ = factory.build_maskrcnn(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
class RetinaNetBuilderTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('resnet', (640, 640), False),
('resnet', (None, None), True),
)
def test_builder(self, backbone_type, input_size, has_att_heads):
num_classes = 2
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], 3])
if has_att_heads:
attribute_heads_config = [
retinanet_cfg.AttributeHead(name='att1'),
retinanet_cfg.AttributeHead(
name='att2', type='classification', size=2),
]
else:
attribute_heads_config = None
model_config = retinanet_cfg.RetinaNet(
num_classes=num_classes,
backbone=backbones.Backbone(type=backbone_type),
head=retinanet_cfg.RetinaNetHead(
attribute_heads=attribute_heads_config))
l2_regularizer = tf.keras.regularizers.l2(5e-5)
_ = factory.build_retinanet(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
if has_att_heads:
self.assertEqual(model_config.head.attribute_heads[0].as_dict(),
dict(name='att1', type='regression', size=1))
self.assertEqual(model_config.head.attribute_heads[1].as_dict(),
dict(name='att2', type='classification', size=2))
class VideoClassificationModelBuilderTest(parameterized.TestCase,
tf.test.TestCase):
@parameterized.parameters(
('resnet_3d', (8, 224, 224), 5e-5),
('resnet_3d', (None, None, None), 5e-5),
)
def test_builder(self, backbone_type, input_size, weight_decay):
input_specs = tf.keras.layers.InputSpec(
shape=[None, input_size[0], input_size[1], input_size[2], 3])
model_config = video_classification_cfg.VideoClassificationModel(
backbone=backbones_3d.Backbone3D(type=backbone_type))
l2_regularizer = (
tf.keras.regularizers.l2(weight_decay) if weight_decay else None)
_ = factory_3d.build_video_classification_model(
input_specs=input_specs,
model_config=model_config,
num_classes=2,
l2_regularizer=l2_regularizer)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Heads package definition."""
from official.vision.beta.modeling.heads.dense_prediction_heads import RetinaNetHead
from official.vision.beta.modeling.heads.dense_prediction_heads import RPNHead
from official.vision.beta.modeling.heads.instance_heads import DetectionHead
from official.vision.beta.modeling.heads.instance_heads import MaskHead
from official.vision.beta.modeling.heads.segmentation_heads import SegmentationHead
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of dense prediction heads."""
from typing import Any, Dict, List, Mapping, Optional, Union
# Import libraries
import numpy as np
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Beta')
class RetinaNetHead(tf.keras.layers.Layer):
"""Creates a RetinaNet head."""
def __init__(
self,
min_level: int,
max_level: int,
num_classes: int,
num_anchors_per_location: int,
num_convs: int = 4,
num_filters: int = 256,
attribute_heads: Optional[List[Dict[str, Any]]] = None,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
num_params_per_anchor: int = 4,
**kwargs):
"""Initializes a RetinaNet head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_classes: An `int` number of classes to predict.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
conv layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate conv layers.
attribute_heads: If not None, a list that contains a dict for each
additional attribute head. Each dict consists of 3 key-value pairs:
`name`, `type` ('regression' or 'classification'), and `size` (number
of predicted values for each instance).
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
num_params_per_anchor: Number of parameters required to specify an anchor
box. For example, `num_params_per_anchor` would be 4 for axis-aligned
anchor boxes specified by their y-centers, x-centers, heights, and
widths.
**kwargs: Additional keyword arguments to be passed.
"""
super(RetinaNetHead, self).__init__(**kwargs)
self._config_dict = {
'min_level': min_level,
'max_level': max_level,
'num_classes': num_classes,
'num_anchors_per_location': num_anchors_per_location,
'num_convs': num_convs,
'num_filters': num_filters,
'attribute_heads': attribute_heads,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'num_params_per_anchor': num_params_per_anchor,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=0.01),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
# Class net.
self._cls_convs = []
self._cls_norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_cls_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
cls_conv_name = 'classnet-conv_{}'.format(i)
self._cls_convs.append(conv_op(name=cls_conv_name, **conv_kwargs))
cls_norm_name = 'classnet-conv-norm_{}_{}'.format(level, i)
this_level_cls_norms.append(bn_op(name=cls_norm_name, **bn_kwargs))
self._cls_norms.append(this_level_cls_norms)
classifier_kwargs = {
'filters': (
self._config_dict['num_classes'] *
self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
classifier_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._classifier = conv_op(name='scores', **classifier_kwargs)
# Box net.
self._box_convs = []
self._box_norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_box_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
box_conv_name = 'boxnet-conv_{}'.format(i)
self._box_convs.append(conv_op(name=box_conv_name, **conv_kwargs))
box_norm_name = 'boxnet-conv-norm_{}_{}'.format(level, i)
this_level_box_norms.append(bn_op(name=box_norm_name, **bn_kwargs))
self._box_norms.append(this_level_box_norms)
box_regressor_kwargs = {
'filters': (self._config_dict['num_params_per_anchor'] *
self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
box_regressor_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._box_regressor = conv_op(name='boxes', **box_regressor_kwargs)
# Attribute learning nets.
if self._config_dict['attribute_heads']:
self._att_predictors = {}
self._att_convs = {}
self._att_norms = {}
for att_config in self._config_dict['attribute_heads']:
att_name = att_config['name']
att_type = att_config['type']
att_size = att_config['size']
att_convs_i = []
att_norms_i = []
# Build conv and norm layers.
for level in range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1):
this_level_att_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
att_conv_name = '{}-conv_{}'.format(att_name, i)
att_convs_i.append(conv_op(name=att_conv_name, **conv_kwargs))
att_norm_name = '{}-conv-norm_{}_{}'.format(att_name, level, i)
this_level_att_norms.append(bn_op(name=att_norm_name, **bn_kwargs))
att_norms_i.append(this_level_att_norms)
self._att_convs[att_name] = att_convs_i
self._att_norms[att_name] = att_norms_i
# Build the final prediction layer.
att_predictor_kwargs = {
'filters':
(att_size * self._config_dict['num_anchors_per_location']),
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if att_type == 'regression':
att_predictor_kwargs.update(
{'bias_initializer': tf.zeros_initializer()})
elif att_type == 'classification':
att_predictor_kwargs.update({
'bias_initializer':
tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
})
else:
raise ValueError(
'Attribute head type {} not supported.'.format(att_type))
if not self._config_dict['use_separable_conv']:
att_predictor_kwargs.update({
'kernel_initializer':
tf.keras.initializers.RandomNormal(stddev=1e-5),
'kernel_regularizer':
self._config_dict['kernel_regularizer'],
})
self._att_predictors[att_name] = conv_op(
name='{}_attributes'.format(att_name), **att_predictor_kwargs)
super(RetinaNetHead, self).build(input_shape)
def call(self, features: Mapping[str, tf.Tensor]):
"""Forward pass of the RetinaNet head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l,
num_params_per_anchor * num_anchors_per_location].
attributes: a dict of (attribute_name, attribute_prediction). Each
`attribute_prediction` is a dict of:
- key: `str`, the level of the multilevel predictions.
- values: `Tensor`, the box scores predicted from a particular feature
level, whose shape is
[batch, height_l, width_l,
attribute_size * num_anchors_per_location].
Can be an empty dictionary if no attribute learning is required.
"""
scores = {}
boxes = {}
if self._config_dict['attribute_heads']:
attributes = {
att_config['name']: {}
for att_config in self._config_dict['attribute_heads']
}
else:
attributes = {}
for i, level in enumerate(
range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1)):
this_level_features = features[str(level)]
# class net.
x = this_level_features
for conv, norm in zip(self._cls_convs, self._cls_norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
scores[str(level)] = self._classifier(x)
# box net.
x = this_level_features
for conv, norm in zip(self._box_convs, self._box_norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
boxes[str(level)] = self._box_regressor(x)
# attribute nets.
if self._config_dict['attribute_heads']:
for att_config in self._config_dict['attribute_heads']:
att_name = att_config['name']
x = this_level_features
for conv, norm in zip(self._att_convs[att_name],
self._att_norms[att_name][i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
attributes[att_name][str(level)] = self._att_predictors[att_name](x)
return scores, boxes, attributes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Beta')
class RPNHead(tf.keras.layers.Layer):
"""Creates a Region Proposal Network (RPN) head."""
def __init__(
self,
min_level: int,
max_level: int,
num_anchors_per_location: int,
num_convs: int = 1,
num_filters: int = 256,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a Region Proposal Network head.
Args:
min_level: An `int` number of minimum feature level.
max_level: An `int` number of maximum feature level.
num_anchors_per_location: An `int` number of number of anchors per pixel
location.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the prediction.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(RPNHead, self).__init__(**kwargs)
self._config_dict = {
'min_level': min_level,
'max_level': max_level,
'num_anchors_per_location': num_anchors_per_location,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=0.01),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._norms = []
for level in range(
self._config_dict['min_level'], self._config_dict['max_level'] + 1):
this_level_norms = []
for i in range(self._config_dict['num_convs']):
if level == self._config_dict['min_level']:
conv_name = 'rpn-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
norm_name = 'rpn-conv-norm_{}_{}'.format(level, i)
this_level_norms.append(bn_op(name=norm_name, **bn_kwargs))
self._norms.append(this_level_norms)
classifier_kwargs = {
'filters': self._config_dict['num_anchors_per_location'],
'kernel_size': 1,
'padding': 'valid',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
classifier_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._classifier = conv_op(name='rpn-scores', **classifier_kwargs)
box_regressor_kwargs = {
'filters': 4 * self._config_dict['num_anchors_per_location'],
'kernel_size': 1,
'padding': 'valid',
'bias_initializer': tf.zeros_initializer(),
'bias_regularizer': self._config_dict['bias_regularizer'],
}
if not self._config_dict['use_separable_conv']:
box_regressor_kwargs.update({
'kernel_initializer': tf.keras.initializers.RandomNormal(
stddev=1e-5),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
})
self._box_regressor = conv_op(name='rpn-boxes', **box_regressor_kwargs)
super(RPNHead, self).build(input_shape)
def call(self, features: Mapping[str, tf.Tensor]):
"""Forward pass of the RPN head.
Args:
features: A `dict` of `tf.Tensor` where
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Returns:
scores: A `dict` of `tf.Tensor` which includes scores of the predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, num_classes * num_anchors_per_location].
boxes: A `dict` of `tf.Tensor` which includes coordinates of the
predictions.
- key: A `str` of the level of the multilevel predictions.
- values: A `tf.Tensor` of the box scores predicted from a particular
feature level, whose shape is
[batch, height_l, width_l, 4 * num_anchors_per_location].
"""
scores = {}
boxes = {}
for i, level in enumerate(
range(self._config_dict['min_level'],
self._config_dict['max_level'] + 1)):
x = features[str(level)]
for conv, norm in zip(self._convs, self._norms[i]):
x = conv(x)
x = norm(x)
x = self._activation(x)
scores[str(level)] = self._classifier(x)
boxes[str(level)] = self._box_regressor(x)
return scores, boxes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for dense_prediction_heads.py."""
# Import libraries
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.modeling.heads import dense_prediction_heads
class RetinaNetHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(False, False, False),
(False, True, False),
(True, False, True),
(True, True, True),
)
def test_forward(self, use_separable_conv, use_sync_bn, has_att_heads):
if has_att_heads:
attribute_heads = [dict(name='depth', type='regression', size=1)]
else:
attribute_heads = None
retinanet_head = dense_prediction_heads.RetinaNetHead(
min_level=3,
max_level=4,
num_classes=3,
num_anchors_per_location=3,
num_convs=2,
num_filters=256,
attribute_heads=attribute_heads,
use_separable_conv=use_separable_conv,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
}
scores, boxes, attributes = retinanet_head(features)
self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 9])
self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 9])
self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
if has_att_heads:
for att in attributes.values():
self.assertAllEqual(att['3'].numpy().shape, [2, 128, 128, 3])
self.assertAllEqual(att['4'].numpy().shape, [2, 64, 64, 3])
def test_serialize_deserialize(self):
retinanet_head = dense_prediction_heads.RetinaNetHead(
min_level=3,
max_level=7,
num_classes=3,
num_anchors_per_location=9,
num_convs=2,
num_filters=16,
attribute_heads=None,
use_separable_conv=False,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = retinanet_head.get_config()
new_retinanet_head = (
dense_prediction_heads.RetinaNetHead.from_config(config))
self.assertAllEqual(
retinanet_head.get_config(), new_retinanet_head.get_config())
class RpnHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(False, False),
(False, True),
(True, False),
(True, True),
)
def test_forward(self, use_separable_conv, use_sync_bn):
rpn_head = dense_prediction_heads.RPNHead(
min_level=3,
max_level=4,
num_anchors_per_location=3,
num_convs=2,
num_filters=256,
use_separable_conv=use_separable_conv,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
}
scores, boxes = rpn_head(features)
self.assertAllEqual(scores['3'].numpy().shape, [2, 128, 128, 3])
self.assertAllEqual(scores['4'].numpy().shape, [2, 64, 64, 3])
self.assertAllEqual(boxes['3'].numpy().shape, [2, 128, 128, 12])
self.assertAllEqual(boxes['4'].numpy().shape, [2, 64, 64, 12])
def test_serialize_deserialize(self):
rpn_head = dense_prediction_heads.RPNHead(
min_level=3,
max_level=7,
num_anchors_per_location=9,
num_convs=2,
num_filters=16,
use_separable_conv=False,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = rpn_head.get_config()
new_rpn_head = dense_prediction_heads.RPNHead.from_config(config)
self.assertAllEqual(rpn_head.get_config(), new_rpn_head.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of instance prediction heads."""
from typing import List, Union, Optional
# Import libraries
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Beta')
class DetectionHead(tf.keras.layers.Layer):
"""Creates a detection head."""
def __init__(
self,
num_classes: int,
num_convs: int = 0,
num_filters: int = 256,
use_separable_conv: bool = False,
num_fcs: int = 2,
fc_dims: int = 1024,
class_agnostic_bbox_pred: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a detection head.
Args:
num_classes: An `int` for the number of classes.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the FC layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
num_fcs: An `int` number that represents the number of FC layers before
the predictions.
fc_dims: An `int` number that represents the number of dimension of the FC
layers.
class_agnostic_bbox_pred: `bool`, indicating whether bboxes should be
predicted for every class or not.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(DetectionHead, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'num_fcs': num_fcs,
'fc_dims': fc_dims,
'class_agnostic_bbox_pred': class_agnostic_bbox_pred,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._conv_norms = []
for i in range(self._config_dict['num_convs']):
conv_name = 'detection-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
bn_name = 'detection-conv-bn_{}'.format(i)
self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._fcs = []
self._fc_norms = []
for i in range(self._config_dict['num_fcs']):
fc_name = 'detection-fc_{}'.format(i)
self._fcs.append(
tf.keras.layers.Dense(
units=self._config_dict['fc_dims'],
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=1 / 3.0, mode='fan_out', distribution='uniform'),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name=fc_name))
bn_name = 'detection-fc-bn_{}'.format(i)
self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._classifier = tf.keras.layers.Dense(
units=self._config_dict['num_classes'],
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='detection-scores')
num_box_outputs = (4 if self._config_dict['class_agnostic_bbox_pred'] else
self._config_dict['num_classes'] * 4)
self._box_regressor = tf.keras.layers.Dense(
units=num_box_outputs,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='detection-boxes')
super(DetectionHead, self).build(input_shape)
def call(self, inputs: tf.Tensor, training: bool = None):
"""Forward pass of box and class branches for the Mask-RCNN model.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, num_instances, roi_height,
roi_width, roi_channels], representing the ROI features.
training: a `bool` indicating whether it is in `training` mode.
Returns:
class_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: A `tf.Tensor` of the shape
[batch_size, num_rois, num_classes * 4], representing the box
predictions.
"""
roi_features = inputs
_, num_rois, height, width, filters = roi_features.get_shape().as_list()
x = tf.reshape(roi_features, [-1, height, width, filters])
for conv, bn in zip(self._convs, self._conv_norms):
x = conv(x)
x = bn(x)
x = self._activation(x)
_, _, _, filters = x.get_shape().as_list()
x = tf.reshape(x, [-1, num_rois, height * width * filters])
for fc, bn in zip(self._fcs, self._fc_norms):
x = fc(x)
x = bn(x)
x = self._activation(x)
classes = self._classifier(x)
boxes = self._box_regressor(x)
return classes, boxes
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Beta')
class MaskHead(tf.keras.layers.Layer):
"""Creates a mask head."""
def __init__(
self,
num_classes: int,
upsample_factor: int = 2,
num_convs: int = 4,
num_filters: int = 256,
use_separable_conv: bool = False,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
class_agnostic: bool = False,
**kwargs):
"""Initializes a mask head.
Args:
num_classes: An `int` of the number of classes.
upsample_factor: An `int` that indicates the upsample factor to generate
the final predicted masks. It should be >= 1.
num_convs: An `int` number that represents the number of the intermediate
convolution layers before the mask prediction layers.
num_filters: An `int` number that represents the number of filters of the
intermediate convolution layers.
use_separable_conv: A `bool` that indicates whether the separable
convolution layers is used.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
class_agnostic: A `bool`. If set, we use a single channel mask head that
is shared between all classes.
**kwargs: Additional keyword arguments to be passed.
"""
super(MaskHead, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'upsample_factor': upsample_factor,
'num_convs': num_convs,
'num_filters': num_filters,
'use_separable_conv': use_separable_conv,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
'class_agnostic': class_agnostic
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the head."""
conv_op = (tf.keras.layers.SeparableConv2D
if self._config_dict['use_separable_conv']
else tf.keras.layers.Conv2D)
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._conv_norms = []
for i in range(self._config_dict['num_convs']):
conv_name = 'mask-conv_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
bn_name = 'mask-conv-bn_{}'.format(i)
self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._deconv = tf.keras.layers.Conv2DTranspose(
filters=self._config_dict['num_filters'],
kernel_size=self._config_dict['upsample_factor'],
strides=self._config_dict['upsample_factor'],
padding='valid',
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='mask-upsampling')
self._deconv_bn = bn_op(name='mask-deconv-bn', **bn_kwargs)
if self._config_dict['class_agnostic']:
num_filters = 1
else:
num_filters = self._config_dict['num_classes']
conv_kwargs = {
'filters': num_filters,
'kernel_size': 1,
'padding': 'valid',
}
if self._config_dict['use_separable_conv']:
conv_kwargs.update({
'depthwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'pointwise_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'depthwise_regularizer': self._config_dict['kernel_regularizer'],
'pointwise_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
else:
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
self._mask_regressor = conv_op(name='mask-logits', **conv_kwargs)
super(MaskHead, self).build(input_shape)
def call(self, inputs: List[tf.Tensor], training: bool = None):
"""Forward pass of mask branch for the Mask-RCNN model.
Args:
inputs: A `list` of two tensors where
inputs[0]: A `tf.Tensor` of shape [batch_size, num_instances,
roi_height, roi_width, roi_channels], representing the ROI features.
inputs[1]: A `tf.Tensor` of shape [batch_size, num_instances],
representing the classes of the ROIs.
training: A `bool` indicating whether it is in `training` mode.
Returns:
mask_outputs: A `tf.Tensor` of shape
[batch_size, num_instances, roi_height * upsample_factor,
roi_width * upsample_factor], representing the mask predictions.
"""
roi_features, roi_classes = inputs
batch_size, num_rois, height, width, filters = (
roi_features.get_shape().as_list())
if batch_size is None:
batch_size = tf.shape(roi_features)[0]
x = tf.reshape(roi_features, [-1, height, width, filters])
for conv, bn in zip(self._convs, self._conv_norms):
x = conv(x)
x = bn(x)
x = self._activation(x)
x = self._deconv(x)
x = self._deconv_bn(x)
x = self._activation(x)
logits = self._mask_regressor(x)
mask_height = height * self._config_dict['upsample_factor']
mask_width = width * self._config_dict['upsample_factor']
if self._config_dict['class_agnostic']:
logits = tf.reshape(logits, [-1, num_rois, mask_height, mask_width, 1])
else:
logits = tf.reshape(
logits,
[-1, num_rois, mask_height, mask_width,
self._config_dict['num_classes']])
batch_indices = tf.tile(
tf.expand_dims(tf.range(batch_size), axis=1), [1, num_rois])
mask_indices = tf.tile(
tf.expand_dims(tf.range(num_rois), axis=0), [batch_size, 1])
if self._config_dict['class_agnostic']:
class_gather_indices = tf.zeros_like(roi_classes, dtype=tf.int32)
else:
class_gather_indices = tf.cast(roi_classes, dtype=tf.int32)
gather_indices = tf.stack(
[batch_indices, mask_indices, class_gather_indices],
axis=2)
mask_outputs = tf.gather_nd(
tf.transpose(logits, [0, 1, 4, 2, 3]), gather_indices)
return mask_outputs
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for instance_heads.py."""
# Import libraries
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.modeling.heads import instance_heads
class DetectionHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(0, 0, False, False),
(0, 1, False, False),
(1, 0, False, False),
(1, 1, False, False),
)
def test_forward(self, num_convs, num_fcs, use_separable_conv, use_sync_bn):
detection_head = instance_heads.DetectionHead(
num_classes=3,
num_convs=num_convs,
num_filters=16,
use_separable_conv=use_separable_conv,
num_fcs=num_fcs,
fc_dims=4,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
roi_features = np.random.rand(2, 10, 128, 128, 16)
scores, boxes = detection_head(roi_features)
self.assertAllEqual(scores.numpy().shape, [2, 10, 3])
self.assertAllEqual(boxes.numpy().shape, [2, 10, 12])
def test_serialize_deserialize(self):
detection_head = instance_heads.DetectionHead(
num_classes=91,
num_convs=0,
num_filters=256,
use_separable_conv=False,
num_fcs=2,
fc_dims=1024,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = detection_head.get_config()
new_detection_head = instance_heads.DetectionHead.from_config(config)
self.assertAllEqual(
detection_head.get_config(), new_detection_head.get_config())
class MaskHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(1, 1, False),
(1, 2, False),
(2, 1, False),
(2, 2, False),
)
def test_forward(self, upsample_factor, num_convs, use_sync_bn):
mask_head = instance_heads.MaskHead(
num_classes=3,
upsample_factor=upsample_factor,
num_convs=num_convs,
num_filters=16,
use_separable_conv=False,
activation='relu',
use_sync_bn=use_sync_bn,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
roi_features = np.random.rand(2, 10, 14, 14, 16)
roi_classes = np.zeros((2, 10))
masks = mask_head([roi_features, roi_classes])
self.assertAllEqual(
masks.numpy().shape,
[2, 10, 14 * upsample_factor, 14 * upsample_factor])
def test_serialize_deserialize(self):
mask_head = instance_heads.MaskHead(
num_classes=3,
upsample_factor=2,
num_convs=1,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
kernel_regularizer=None,
bias_regularizer=None,
)
config = mask_head.get_config()
new_mask_head = instance_heads.MaskHead.from_config(config)
self.assertAllEqual(
mask_head.get_config(), new_mask_head.get_config())
def test_forward_class_agnostic(self):
mask_head = instance_heads.MaskHead(
num_classes=3,
class_agnostic=True
)
roi_features = np.random.rand(2, 10, 14, 14, 16)
roi_classes = np.zeros((2, 10))
masks = mask_head([roi_features, roi_classes])
self.assertAllEqual(masks.numpy().shape, [2, 10, 28, 28])
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of segmentation heads."""
from typing import List, Union, Optional, Mapping, Tuple, Any
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.beta.modeling.layers import nn_layers
from official.vision.beta.ops import spatial_transform_ops
class MaskScoring(tf.keras.Model):
"""Creates a mask scoring layer.
This implements mask scoring layer from the paper:
Zhaojin Huang, Lichao Huang, Yongchao Gong, Chang Huang, Xinggang Wang.
Mask Scoring R-CNN.
(https://arxiv.org/pdf/1903.00241.pdf)
"""
def __init__(
self,
num_classes: int,
fc_input_size: List[int],
num_convs: int = 3,
num_filters: int = 256,
fc_dims: int = 1024,
num_fcs: int = 2,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes mask scoring layer.
Args:
num_classes: An `int` for number of classes.
fc_input_size: A List of `int` for the input size of the
fully connected layers.
num_convs: An`int` for number of conv layers.
num_filters: An `int` for the number of filters for conv layers.
fc_dims: An `int` number of filters for each fully connected layers.
num_fcs: An `int` for number of fully connected layers.
activation: A `str` name of the activation function.
use_sync_bn: A bool, whether or not to use sync batch normalization.
norm_momentum: A float for the momentum in BatchNorm. Defaults to 0.99.
norm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(MaskScoring, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'num_convs': num_convs,
'num_filters': num_filters,
'fc_input_size': fc_input_size,
'fc_dims': fc_dims,
'num_fcs': num_fcs,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'activation': activation,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer,
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the mask scoring head."""
conv_op = tf.keras.layers.Conv2D
conv_kwargs = {
'filters': self._config_dict['num_filters'],
'kernel_size': 3,
'padding': 'same',
}
conv_kwargs.update({
'kernel_initializer': tf.keras.initializers.VarianceScaling(
scale=2, mode='fan_out', distribution='untruncated_normal'),
'bias_initializer': tf.zeros_initializer(),
'kernel_regularizer': self._config_dict['kernel_regularizer'],
'bias_regularizer': self._config_dict['bias_regularizer'],
})
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
self._convs = []
self._conv_norms = []
for i in range(self._config_dict['num_convs']):
conv_name = 'mask-scoring_{}'.format(i)
self._convs.append(conv_op(name=conv_name, **conv_kwargs))
bn_name = 'mask-scoring-bn_{}'.format(i)
self._conv_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._fcs = []
self._fc_norms = []
for i in range(self._config_dict['num_fcs']):
fc_name = 'mask-scoring-fc_{}'.format(i)
self._fcs.append(
tf.keras.layers.Dense(
units=self._config_dict['fc_dims'],
kernel_initializer=tf.keras.initializers.VarianceScaling(
scale=1 / 3.0, mode='fan_out', distribution='uniform'),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name=fc_name))
bn_name = 'mask-scoring-fc-bn_{}'.format(i)
self._fc_norms.append(bn_op(name=bn_name, **bn_kwargs))
self._classifier = tf.keras.layers.Dense(
units=self._config_dict['num_classes'],
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
bias_initializer=tf.zeros_initializer(),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'],
name='iou-scores')
super(MaskScoring, self).build(input_shape)
def call(self, inputs: tf.Tensor, training: bool = None):
"""Forward pass mask scoring head.
Args:
inputs: A `tf.Tensor` of the shape [batch_size, width, size, num_classes],
representing the segmentation logits.
training: a `bool` indicating whether it is in `training` mode.
Returns:
mask_scores: A `tf.Tensor` of predicted mask scores
[batch_size, num_classes].
"""
x = tf.stop_gradient(inputs)
for conv, bn in zip(self._convs, self._conv_norms):
x = conv(x)
x = bn(x)
x = self._activation(x)
# Casts feat to float32 so the resize op can be run on TPU.
x = tf.cast(x, tf.float32)
x = tf.image.resize(x, size=self._config_dict['fc_input_size'],
method=tf.image.ResizeMethod.BILINEAR)
# Casts it back to be compatible with the rest opetations.
x = tf.cast(x, inputs.dtype)
_, h, w, filters = x.get_shape().as_list()
x = tf.reshape(x, [-1, h * w * filters])
for fc, bn in zip(self._fcs, self._fc_norms):
x = fc(x)
x = bn(x)
x = self._activation(x)
ious = self._classifier(x)
return ious
def get_config(self) -> Mapping[str, Any]:
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@tf.keras.utils.register_keras_serializable(package='Beta')
class SegmentationHead(tf.keras.layers.Layer):
"""Creates a segmentation head."""
def __init__(
self,
num_classes: int,
level: Union[int, str],
num_convs: int = 2,
num_filters: int = 256,
use_depthwise_convolution: bool = False,
prediction_kernel_size: int = 1,
upsample_factor: int = 1,
feature_fusion: Optional[str] = None,
decoder_min_level: Optional[int] = None,
decoder_max_level: Optional[int] = None,
low_level: int = 2,
low_level_num_filters: int = 48,
num_decoder_filters: int = 256,
activation: str = 'relu',
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
bias_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs):
"""Initializes a segmentation head.
Args:
num_classes: An `int` number of mask classification categories. The number
of classes does not include background class.
level: An `int` or `str`, level to use to build segmentation head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level.
decoder_min_level: An `int` of minimum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
decoder_max_level: An `int` of maximum level from decoder to use in
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super(SegmentationHead, self).__init__(**kwargs)
self._config_dict = {
'num_classes': num_classes,
'level': level,
'num_convs': num_convs,
'num_filters': num_filters,
'use_depthwise_convolution': use_depthwise_convolution,
'prediction_kernel_size': prediction_kernel_size,
'upsample_factor': upsample_factor,
'feature_fusion': feature_fusion,
'decoder_min_level': decoder_min_level,
'decoder_max_level': decoder_max_level,
'low_level': low_level,
'low_level_num_filters': low_level_num_filters,
'num_decoder_filters': num_decoder_filters,
'activation': activation,
'use_sync_bn': use_sync_bn,
'norm_momentum': norm_momentum,
'norm_epsilon': norm_epsilon,
'kernel_regularizer': kernel_regularizer,
'bias_regularizer': bias_regularizer
}
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation = tf_utils.get_activation(activation)
def build(self, input_shape: Union[tf.TensorShape, List[tf.TensorShape]]):
"""Creates the variables of the segmentation head."""
use_depthwise_convolution = self._config_dict['use_depthwise_convolution']
random_initializer = tf.keras.initializers.RandomNormal(stddev=0.01)
conv_op = tf.keras.layers.Conv2D
conv_kwargs = {
'kernel_size': 3 if not use_depthwise_convolution else 1,
'padding': 'same',
'use_bias': False,
'kernel_initializer': random_initializer,
'kernel_regularizer': self._config_dict['kernel_regularizer'],
}
bn_op = (tf.keras.layers.experimental.SyncBatchNormalization
if self._config_dict['use_sync_bn']
else tf.keras.layers.BatchNormalization)
bn_kwargs = {
'axis': self._bn_axis,
'momentum': self._config_dict['norm_momentum'],
'epsilon': self._config_dict['norm_epsilon'],
}
if self._config_dict['feature_fusion'] == 'deeplabv3plus':
# Deeplabv3+ feature fusion layers.
self._dlv3p_conv = conv_op(
kernel_size=1,
padding='same',
use_bias=False,
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
kernel_regularizer=self._config_dict['kernel_regularizer'],
name='segmentation_head_deeplabv3p_fusion_conv',
filters=self._config_dict['low_level_num_filters'])
self._dlv3p_norm = bn_op(
name='segmentation_head_deeplabv3p_fusion_norm', **bn_kwargs)
elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
self._panoptic_fpn_fusion = nn_layers.PanopticFPNFusion(
min_level=self._config_dict['decoder_min_level'],
max_level=self._config_dict['decoder_max_level'],
target_level=self._config_dict['level'],
num_filters=self._config_dict['num_filters'],
num_fpn_filters=self._config_dict['num_decoder_filters'],
activation=self._config_dict['activation'],
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
# Segmentation head layers.
self._convs = []
self._norms = []
for i in range(self._config_dict['num_convs']):
if use_depthwise_convolution:
self._convs.append(
tf.keras.layers.DepthwiseConv2D(
name='segmentation_head_depthwise_conv_{}'.format(i),
kernel_size=3,
padding='same',
use_bias=False,
depthwise_initializer=random_initializer,
depthwise_regularizer=self._config_dict['kernel_regularizer'],
depth_multiplier=1))
norm_name = 'segmentation_head_depthwise_norm_{}'.format(i)
self._norms.append(bn_op(name=norm_name, **bn_kwargs))
conv_name = 'segmentation_head_conv_{}'.format(i)
self._convs.append(
conv_op(
name=conv_name,
filters=self._config_dict['num_filters'],
**conv_kwargs))
norm_name = 'segmentation_head_norm_{}'.format(i)
self._norms.append(bn_op(name=norm_name, **bn_kwargs))
self._classifier = conv_op(
name='segmentation_output',
filters=self._config_dict['num_classes'],
kernel_size=self._config_dict['prediction_kernel_size'],
padding='same',
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
kernel_regularizer=self._config_dict['kernel_regularizer'],
bias_regularizer=self._config_dict['bias_regularizer'])
super().build(input_shape)
def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
"""Forward pass of the segmentation head.
It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone endpoints, and the second is decoder endpoints. When inputs are
tensors, they are from a single level of feature maps. When inputs are
dictionaries, they contain multiple levels of feature maps, where the key
is the index of feature map.
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
The first is backbone endpoints, and the second is decoder endpoints.
Returns:
segmentation prediction mask: A `tf.Tensor` of the segmentation mask
scores predicted from input features.
"""
backbone_output = inputs[0]
decoder_output = inputs[1]
if self._config_dict['feature_fusion'] == 'deeplabv3plus':
# deeplabv3+ feature fusion
x = decoder_output[str(self._config_dict['level'])] if isinstance(
decoder_output, dict) else decoder_output
y = backbone_output[str(self._config_dict['low_level'])] if isinstance(
backbone_output, dict) else backbone_output
y = self._dlv3p_norm(self._dlv3p_conv(y))
y = self._activation(y)
x = tf.image.resize(
x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR)
x = tf.cast(x, dtype=y.dtype)
x = tf.concat([x, y], axis=self._bn_axis)
elif self._config_dict['feature_fusion'] == 'pyramid_fusion':
if not isinstance(decoder_output, dict):
raise ValueError('Only support dictionary decoder_output.')
x = nn_layers.pyramid_feature_fusion(decoder_output,
self._config_dict['level'])
elif self._config_dict['feature_fusion'] == 'panoptic_fpn_fusion':
x = self._panoptic_fpn_fusion(decoder_output)
else:
x = decoder_output[str(self._config_dict['level'])] if isinstance(
decoder_output, dict) else decoder_output
for conv, norm in zip(self._convs, self._norms):
x = conv(x)
x = norm(x)
x = self._activation(x)
if self._config_dict['upsample_factor'] > 1:
x = spatial_transform_ops.nearest_upsampling(
x, scale=self._config_dict['upsample_factor'])
return self._classifier(x)
def get_config(self):
base_config = super().get_config()
return dict(list(base_config.items()) + list(self._config_dict.items()))
@classmethod
def from_config(cls, config):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for segmentation_heads.py."""
# Import libraries
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.modeling.heads import segmentation_heads
class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(2, 'pyramid_fusion', None, None),
(3, 'pyramid_fusion', None, None),
(2, 'panoptic_fpn_fusion', 2, 5),
(2, 'panoptic_fpn_fusion', 2, 6),
(3, 'panoptic_fpn_fusion', 3, 5),
(3, 'panoptic_fpn_fusion', 3, 6))
def test_forward(self, level, feature_fusion,
decoder_min_level, decoder_max_level):
backbone_features = {
'3': np.random.rand(2, 128, 128, 16),
'4': np.random.rand(2, 64, 64, 16),
'5': np.random.rand(2, 32, 32, 16),
}
decoder_features = {
'3': np.random.rand(2, 128, 128, 64),
'4': np.random.rand(2, 64, 64, 64),
'5': np.random.rand(2, 32, 32, 64),
'6': np.random.rand(2, 16, 16, 64),
}
if feature_fusion == 'panoptic_fpn_fusion':
backbone_features['2'] = np.random.rand(2, 256, 256, 16)
decoder_features['2'] = np.random.rand(2, 256, 256, 64)
head = segmentation_heads.SegmentationHead(
num_classes=10,
level=level,
feature_fusion=feature_fusion,
decoder_min_level=decoder_min_level,
decoder_max_level=decoder_max_level,
num_decoder_filters=64)
logits = head((backbone_features, decoder_features))
if level in decoder_features:
self.assertAllEqual(logits.numpy().shape, [
2, decoder_features[str(level)].shape[1],
decoder_features[str(level)].shape[2], 10
])
def test_serialize_deserialize(self):
head = segmentation_heads.SegmentationHead(num_classes=10, level=3)
config = head.get_config()
new_head = segmentation_heads.SegmentationHead.from_config(config)
self.assertAllEqual(head.get_config(), new_head.get_config())
class MaskScoringHeadTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(1, 1, 64, [4, 4]),
(2, 1, 64, [4, 4]),
(3, 1, 64, [4, 4]),
(1, 2, 32, [8, 8]),
(2, 2, 32, [8, 8]),
(3, 2, 32, [8, 8]),)
def test_forward(self, num_convs, num_fcs,
num_filters, fc_input_size):
features = np.random.rand(2, 64, 64, 16)
head = segmentation_heads.MaskScoring(
num_classes=2,
num_convs=num_convs,
num_filters=num_filters,
fc_dims=128,
fc_input_size=fc_input_size)
scores = head(features)
self.assertAllEqual(scores.numpy().shape, [2, 2])
def test_serialize_deserialize(self):
head = segmentation_heads.MaskScoring(
num_classes=2, fc_input_size=[4, 4], fc_dims=128)
config = head.get_config()
new_head = segmentation_heads.MaskScoring.from_config(config)
self.assertAllEqual(head.get_config(), new_head.get_config())
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layers package definition."""
from official.vision.beta.modeling.layers.box_sampler import BoxSampler
from official.vision.beta.modeling.layers.detection_generator import DetectionGenerator
from official.vision.beta.modeling.layers.detection_generator import MultilevelDetectionGenerator
from official.vision.beta.modeling.layers.mask_sampler import MaskSampler
from official.vision.beta.modeling.layers.nn_blocks import BottleneckBlock
from official.vision.beta.modeling.layers.nn_blocks import BottleneckResidualInner
from official.vision.beta.modeling.layers.nn_blocks import DepthwiseSeparableConvBlock
from official.vision.beta.modeling.layers.nn_blocks import InvertedBottleneckBlock
from official.vision.beta.modeling.layers.nn_blocks import ResidualBlock
from official.vision.beta.modeling.layers.nn_blocks import ResidualInner
from official.vision.beta.modeling.layers.nn_blocks import ReversibleLayer
from official.vision.beta.modeling.layers.nn_blocks_3d import BottleneckBlock3D
from official.vision.beta.modeling.layers.nn_blocks_3d import SelfGating
from official.vision.beta.modeling.layers.nn_layers import CausalConvMixin
from official.vision.beta.modeling.layers.nn_layers import Conv2D
from official.vision.beta.modeling.layers.nn_layers import Conv3D
from official.vision.beta.modeling.layers.nn_layers import DepthwiseConv2D
from official.vision.beta.modeling.layers.nn_layers import GlobalAveragePool3D
from official.vision.beta.modeling.layers.nn_layers import PositionalEncoding
from official.vision.beta.modeling.layers.nn_layers import Scale
from official.vision.beta.modeling.layers.nn_layers import SpatialAveragePool3D
from official.vision.beta.modeling.layers.nn_layers import SqueezeExcitation
from official.vision.beta.modeling.layers.nn_layers import StochasticDepth
from official.vision.beta.modeling.layers.nn_layers import TemporalSoftmaxPool
from official.vision.beta.modeling.layers.roi_aligner import MultilevelROIAligner
from official.vision.beta.modeling.layers.roi_generator import MultilevelROIGenerator
from official.vision.beta.modeling.layers.roi_sampler import ROISampler
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of box sampler."""
# Import libraries
import tensorflow as tf
from official.vision.beta.ops import sampling_ops
@tf.keras.utils.register_keras_serializable(package='Beta')
class BoxSampler(tf.keras.layers.Layer):
"""Creates a BoxSampler to sample positive and negative boxes."""
def __init__(self,
num_samples: int = 512,
foreground_fraction: float = 0.25,
**kwargs):
"""Initializes a box sampler.
Args:
num_samples: An `int` of the number of sampled boxes per image.
foreground_fraction: A `float` in [0, 1], what percentage of boxes should
be sampled from the positive examples.
**kwargs: Additional keyword arguments passed to Layer.
"""
self._config_dict = {
'num_samples': num_samples,
'foreground_fraction': foreground_fraction,
}
super(BoxSampler, self).__init__(**kwargs)
def call(self, positive_matches: tf.Tensor, negative_matches: tf.Tensor,
ignored_matches: tf.Tensor):
"""Samples and selects positive and negative instances.
Args:
positive_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a positive example.
negative_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance
corresponds to a negative example.
ignored_matches: A `bool` tensor of shape of [batch, N] where N is the
number of instances. For each element, `True` means the instance should
be ignored.
Returns:
A `tf.tensor` of shape of [batch_size, K], storing the indices of the
sampled examples, where K is `num_samples`.
"""
sample_candidates = tf.logical_and(
tf.logical_or(positive_matches, negative_matches),
tf.logical_not(ignored_matches))
sampler = sampling_ops.BalancedPositiveNegativeSampler(
positive_fraction=self._config_dict['foreground_fraction'],
is_static=True)
batch_size = sample_candidates.shape[0]
sampled_indicators = []
for i in range(batch_size):
sampled_indicator = sampler.subsample(
sample_candidates[i],
self._config_dict['num_samples'],
positive_matches[i])
sampled_indicators.append(sampled_indicator)
sampled_indicators = tf.stack(sampled_indicators)
_, selected_indices = tf.nn.top_k(
tf.cast(sampled_indicators, dtype=tf.int32),
k=self._config_dict['num_samples'],
sorted=True)
return selected_indices
def get_config(self):
return self._config_dict
@classmethod
def from_config(cls, config):
return cls(**config)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layers for DeepLabV3."""
import tensorflow as tf
class SpatialPyramidPooling(tf.keras.layers.Layer):
"""Implements the Atrous Spatial Pyramid Pooling.
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
"""
def __init__(
self,
output_channels,
dilation_rates,
pool_kernel_size=None,
use_sync_bn=False,
batchnorm_momentum=0.99,
batchnorm_epsilon=0.001,
activation='relu',
dropout=0.5,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
interpolation='bilinear',
use_depthwise_convolution=False,
**kwargs):
"""Initializes `SpatialPyramidPooling`.
Args:
output_channels: Number of channels produced by SpatialPyramidPooling.
dilation_rates: A list of integers for parallel dilated conv.
pool_kernel_size: A list of integers or None. If None, global average
pooling is applied, otherwise an average pooling of pool_kernel_size
is applied.
use_sync_bn: A bool, whether or not to use sync batch normalization.
batchnorm_momentum: A float for the momentum in BatchNorm. Defaults to
0.99.
batchnorm_epsilon: A float for the epsilon value in BatchNorm. Defaults to
0.001.
activation: A `str` for type of activation to be used. Defaults to 'relu'.
dropout: A float for the dropout rate before output. Defaults to 0.5.
kernel_initializer: Kernel initializer for conv layers. Defaults to
`glorot_uniform`.
kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
interpolation: The interpolation method for upsampling. Defaults to
`bilinear`.
use_depthwise_convolution: Allows spatial pooling to be separable
depthwise convolusions. [Encoder-Decoder with Atrous Separable
Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1802.02611.pdf)
**kwargs: Other keyword arguments for the layer.
"""
super(SpatialPyramidPooling, self).__init__(**kwargs)
self.output_channels = output_channels
self.dilation_rates = dilation_rates
self.use_sync_bn = use_sync_bn
self.batchnorm_momentum = batchnorm_momentum
self.batchnorm_epsilon = batchnorm_epsilon
self.activation = activation
self.dropout = dropout
self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
self.interpolation = interpolation
self.input_spec = tf.keras.layers.InputSpec(ndim=4)
self.pool_kernel_size = pool_kernel_size
self.use_depthwise_convolution = use_depthwise_convolution
def build(self, input_shape):
height = input_shape[1]
width = input_shape[2]
channels = input_shape[3]
self.aspp_layers = []
if self.use_sync_bn:
bn_op = tf.keras.layers.experimental.SyncBatchNormalization
else:
bn_op = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = -1
else:
bn_axis = 1
conv_sequential = tf.keras.Sequential([
tf.keras.layers.Conv2D(
filters=self.output_channels, kernel_size=(1, 1),
kernel_initializer=self.kernel_initializer,
kernel_regularizer=self.kernel_regularizer,
use_bias=False),
bn_op(
axis=bn_axis,
momentum=self.batchnorm_momentum,
epsilon=self.batchnorm_epsilon),
tf.keras.layers.Activation(self.activation)
])
self.aspp_layers.append(conv_sequential)
for dilation_rate in self.dilation_rates:
leading_layers = []
kernel_size = (3, 3)
if self.use_depthwise_convolution:
leading_layers += [
tf.keras.layers.DepthwiseConv2D(
depth_multiplier=1, kernel_size=kernel_size,
padding='same', depthwise_regularizer=self.kernel_regularizer,
depthwise_initializer=self.kernel_initializer,
dilation_rate=dilation_rate, use_bias=False)
]
kernel_size = (1, 1)
conv_sequential = tf.keras.Sequential(leading_layers + [
tf.keras.layers.Conv2D(
filters=self.output_channels, kernel_size=kernel_size,
padding='same', kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
dilation_rate=dilation_rate, use_bias=False),
bn_op(axis=bn_axis, momentum=self.batchnorm_momentum,
epsilon=self.batchnorm_epsilon),
tf.keras.layers.Activation(self.activation)])
self.aspp_layers.append(conv_sequential)
if self.pool_kernel_size is None:
pool_sequential = tf.keras.Sequential([
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Reshape((1, 1, channels))])
else:
pool_sequential = tf.keras.Sequential([
tf.keras.layers.AveragePooling2D(self.pool_kernel_size)])
pool_sequential.add(
tf.keras.Sequential([
tf.keras.layers.Conv2D(
filters=self.output_channels,
kernel_size=(1, 1),
kernel_initializer=self.kernel_initializer,
kernel_regularizer=self.kernel_regularizer,
use_bias=False),
bn_op(
axis=bn_axis,
momentum=self.batchnorm_momentum,
epsilon=self.batchnorm_epsilon),
tf.keras.layers.Activation(self.activation),
tf.keras.layers.experimental.preprocessing.Resizing(
height,
width,
interpolation=self.interpolation,
dtype=tf.float32)
]))
self.aspp_layers.append(pool_sequential)
self.projection = tf.keras.Sequential([
tf.keras.layers.Conv2D(
filters=self.output_channels, kernel_size=(1, 1),
kernel_initializer=self.kernel_initializer,
kernel_regularizer=self.kernel_regularizer,
use_bias=False),
bn_op(
axis=bn_axis,
momentum=self.batchnorm_momentum,
epsilon=self.batchnorm_epsilon),
tf.keras.layers.Activation(self.activation),
tf.keras.layers.Dropout(rate=self.dropout)])
def call(self, inputs, training=None):
if training is None:
training = tf.keras.backend.learning_phase()
result = []
for layer in self.aspp_layers:
result.append(tf.cast(layer(inputs, training=training), inputs.dtype))
result = tf.concat(result, axis=-1)
result = self.projection(result, training=training)
return result
def get_config(self):
config = {
'output_channels': self.output_channels,
'dilation_rates': self.dilation_rates,
'pool_kernel_size': self.pool_kernel_size,
'use_sync_bn': self.use_sync_bn,
'batchnorm_momentum': self.batchnorm_momentum,
'batchnorm_epsilon': self.batchnorm_epsilon,
'activation': self.activation,
'dropout': self.dropout,
'kernel_initializer': tf.keras.initializers.serialize(
self.kernel_initializer),
'kernel_regularizer': tf.keras.regularizers.serialize(
self.kernel_regularizer),
'interpolation': self.interpolation,
}
base_config = super(SpatialPyramidPooling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for ASPP."""
import tensorflow as tf
from tensorflow.python.keras import keras_parameterized
from official.vision.beta.modeling.layers import deeplab
@keras_parameterized.run_all_keras_modes
class DeeplabTest(keras_parameterized.TestCase):
@keras_parameterized.parameterized.parameters(
(None,),
([32, 32],),
)
def test_aspp(self, pool_kernel_size):
inputs = tf.keras.Input(shape=(64, 64, 128), dtype=tf.float32)
layer = deeplab.SpatialPyramidPooling(output_channels=256,
dilation_rates=[6, 12, 18],
pool_kernel_size=None)
output = layer(inputs)
self.assertAllEqual([None, 64, 64, 256], output.shape)
def test_aspp_invalid_shape(self):
inputs = tf.keras.Input(shape=(64, 64), dtype=tf.float32)
layer = deeplab.SpatialPyramidPooling(output_channels=256,
dilation_rates=[6, 12, 18])
with self.assertRaises(ValueError):
_ = layer(inputs)
def test_config_with_custom_name(self):
layer = deeplab.SpatialPyramidPooling(256, [5], name='aspp')
config = layer.get_config()
layer_1 = deeplab.SpatialPyramidPooling.from_config(config)
self.assertEqual(layer_1.name, layer.name)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment