Commit 78c43ef1 authored by Gunho Park's avatar Gunho Park
Browse files

Merge branch 'master' of https://github.com/tensorflow/models

parents 67cfc95b e3c7e300
......@@ -393,8 +393,10 @@ class SpineNet(tf.keras.Model):
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
logging.warning(
'SpineNet output level out of range [min_level, max_level] = '
'[%s, %s] will not be used for further processing.',
self._min_level, self._max_level)
endpoints[str(block_spec.level)] = x
return endpoints
......
......@@ -152,6 +152,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
use_keras_upsampling_2d: bool = False,
**kwargs):
"""Initializes a Mobile SpineNet model.
......@@ -181,6 +182,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
**kwargs: Additional keyword arguments to be passed.
"""
self._input_specs = input_specs
......@@ -200,12 +202,7 @@ class SpineNetMobile(tf.keras.Model):
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation_fn = tf.nn.relu
elif activation == 'swish':
self._activation_fn = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._use_keras_upsampling_2d = use_keras_upsampling_2d
self._num_init_blocks = 2
if use_sync_bn:
......@@ -271,7 +268,7 @@ class SpineNetMobile(tf.keras.Model):
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
return tf.identity(x, name=name)
return tf.keras.layers.Activation('linear', name=name)(x)
def _build_stem(self, inputs):
"""Builds SpineNet stem."""
......@@ -290,7 +287,7 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
net = []
stem_strides = [1, 2]
......@@ -365,14 +362,15 @@ class SpineNetMobile(tf.keras.Model):
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
weights_sum = layers.Add()(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(layers.Add()(parents))
x = self._block_group(
inputs=x,
in_filters=target_num_filters,
......@@ -421,7 +419,7 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
endpoints[str(level)] = x
return endpoints
......@@ -446,11 +444,13 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
x = spatial_transform_ops.nearest_upsampling(
x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)
# Last 1x1 conv to match filter size.
x = layers.Conv2D(
......@@ -485,7 +485,8 @@ class SpineNetMobile(tf.keras.Model):
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
'norm_epsilon': self._norm_epsilon,
'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
}
return config_dict
......@@ -531,4 +532,5 @@ def build_spinenet_mobile(
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon)
norm_epsilon=norm_activation_config.norm_epsilon,
use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)
......@@ -90,6 +90,7 @@ class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
use_keras_upsampling_2d=False,
)
network = spinenet_mobile.SpineNetMobile(**kwargs)
......
......@@ -24,17 +24,16 @@ from official.vision.beta.modeling.backbones import spinenet
class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 0.65, 1, 0.5, 128),
(256, 1.0, 1, 0.5, 256),
(384, 1.0, 2, 0.5, 256),
(512, 1.0, 3, 1.0, 256),
(640, 1.3, 4, 1.0, 384),
(128, 0.65, 1, 0.5, 128, 4, 6),
(256, 1.0, 1, 0.5, 256, 3, 6),
(384, 1.0, 2, 0.5, 256, 4, 7),
(512, 1.0, 3, 1.0, 256, 3, 7),
(640, 1.3, 4, 1.0, 384, 3, 7),
)
def test_network_creation(self, input_size, filter_size_scale, block_repeats,
resample_alpha, endpoints_num_filters):
resample_alpha, endpoints_num_filters, min_level,
max_level):
"""Test creation of SpineNet models."""
min_level = 3
max_level = 7
tf.keras.backend.set_image_data_format('channels_last')
......
......@@ -13,12 +13,15 @@
# limitations under the License.
"""Contains definitions of Atrous Spatial Pyramid Pooling (ASPP) decoder."""
from typing import Any, List, Optional, Mapping
from typing import Any, List, Mapping, Optional
# Import libraries
import tensorflow as tf
from official.modeling import hyperparams
from official.vision import keras_cv
from official.vision.beta.modeling.decoders import factory
@tf.keras.utils.register_keras_serializable(package='Vision')
......@@ -128,3 +131,46 @@ class ASPP(tf.keras.layers.Layer):
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@factory.register_decoder_builder('aspp')
def build_aspp_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds ASPP decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone. Note this is for consistent
interface, and is not used by ASPP decoder.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the ASPP decoder.
Raises:
ValueError: If the model_config.decoder.type is not `aspp`.
"""
del input_specs # input_specs is not used by ASPP decoder.
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'aspp':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `aspp`.')
norm_activation_config = model_config.norm_activation
return ASPP(
level=decoder_cfg.level,
dilation_rates=decoder_cfg.dilation_rates,
num_filters=decoder_cfg.num_filters,
pool_kernel_size=decoder_cfg.pool_kernel_size,
dropout_rate=decoder_cfg.dropout_rate,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
activation=norm_activation_config.activation,
kernel_regularizer=l2_regularizer)
......@@ -12,80 +12,124 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains the factory method to create decoders."""
"""Decoder registers and factory method.
from typing import Mapping, Optional
One can register a new decoder model by the following two steps:
1 Import the factory and register the build in the decoder file.
2 Import the decoder class and add a build in __init__.py.
```
# my_decoder.py
from modeling.decoders import factory
class MyDecoder():
...
@factory.register_decoder_builder('my_decoder')
def build_my_decoder():
return MyDecoder()
# decoders/__init__.py adds import
from modeling.decoders.my_decoder import MyDecoder
```
If one wants the MyDecoder class to be used only by those binary
then don't imported the decoder module in decoders/__init__.py, but import it
in place that uses it.
"""
from typing import Any, Callable, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.core import registry
from official.modeling import hyperparams
from official.vision.beta.modeling import decoders
_REGISTERED_DECODER_CLS = {}
def register_decoder_builder(key: str) -> Callable[..., Any]:
"""Decorates a builder of decoder class.
The builder should be a Callable (a class or a function).
This decorator supports registration of decoder builder as follows:
```
class MyDecoder(tf.keras.Model):
pass
@register_decoder_builder('mydecoder')
def builder(input_specs, config, l2_reg):
return MyDecoder(...)
# Builds a MyDecoder object.
my_decoder = build_decoder_3d(input_specs, config, l2_reg)
```
Args:
key: A `str` of key to look up the builder.
Returns:
A callable for using as class decorator that registers the decorated class
for creation from an instance of task_config_cls.
"""
return registry.register(_REGISTERED_DECODER_CLS, key)
@register_decoder_builder('identity')
def build_identity(
input_specs: Optional[Mapping[str, tf.TensorShape]] = None,
model_config: Optional[hyperparams.Config] = None,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None) -> None:
"""Builds identity decoder from a config.
All the input arguments are not used by identity decoder but kept here to
ensure the interface is consistent.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
Returns:
An instance of the identity decoder.
"""
del input_specs, model_config, l2_regularizer # Unused by identity decoder.
def build_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
l2_regularizer: tf.keras.regularizers.Regularizer = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:
"""Builds decoder from a config.
A decoder can be a keras.Model, a keras.layers.Layer, or None. If it is not
None, the decoder will take features from the backbone as input and generate
decoded feature maps. If it is None, such as an identity decoder, the decoder
is skipped and features from the backbone are regarded as model output.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
model_config: A `OneOfConfig` of model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` object. Default to
None.
**kwargs: Additional keyword args to be passed to decoder builder.
Returns:
A `tf.keras.Model` instance of the decoder.
An instance of the decoder.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
norm_activation_config = model_config.norm_activation
if decoder_type == 'identity':
decoder = None
elif decoder_type == 'fpn':
decoder = decoders.FPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
elif decoder_type == 'nasfpn':
decoder = decoders.NASFPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
num_repeats=decoder_cfg.num_repeats,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
elif decoder_type == 'aspp':
decoder = decoders.ASPP(
level=decoder_cfg.level,
dilation_rates=decoder_cfg.dilation_rates,
num_filters=decoder_cfg.num_filters,
pool_kernel_size=decoder_cfg.pool_kernel_size,
dropout_rate=decoder_cfg.dropout_rate,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
activation=norm_activation_config.activation,
kernel_regularizer=l2_regularizer)
else:
raise ValueError('Decoder {!r} not implement'.format(decoder_type))
return decoder
decoder_builder = registry.lookup(_REGISTERED_DECODER_CLS,
model_config.decoder.type)
return decoder_builder(
input_specs=input_specs,
model_config=model_config,
l2_regularizer=l2_regularizer,
**kwargs)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for decoder factory functions."""
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from official.vision.beta import configs
from official.vision.beta.configs import decoders as decoders_cfg
from official.vision.beta.modeling import decoders
from official.vision.beta.modeling.decoders import factory
class FactoryTest(tf.test.TestCase, parameterized.TestCase):
@combinations.generate(
combinations.combine(
num_filters=[128, 256], use_separable_conv=[True, False]))
def test_fpn_decoder_creation(self, num_filters, use_separable_conv):
"""Test creation of FPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.FPN(
input_specs=input_specs,
num_filters=num_filters,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='fpn',
fpn=decoders_cfg.FPN(
num_filters=num_filters, use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
num_filters=[128, 256],
num_repeats=[3, 5],
use_separable_conv=[True, False]))
def test_nasfpn_decoder_creation(self, num_filters, num_repeats,
use_separable_conv):
"""Test creation of NASFPN decoder."""
min_level = 3
max_level = 7
input_specs = {}
for level in range(min_level, max_level):
input_specs[str(level)] = tf.TensorShape(
[1, 128 // (2**level), 128 // (2**level), 3])
network = decoders.NASFPN(
input_specs=input_specs,
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv,
use_sync_bn=True)
model_config = configs.retinanet.RetinaNet()
model_config.min_level = min_level
model_config.max_level = max_level
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='nasfpn',
nasfpn=decoders_cfg.NASFPN(
num_filters=num_filters,
num_repeats=num_repeats,
use_separable_conv=use_separable_conv))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
@combinations.generate(
combinations.combine(
level=[3, 4],
dilation_rates=[[6, 12, 18], [6, 12]],
num_filters=[128, 256]))
def test_aspp_decoder_creation(self, level, dilation_rates, num_filters):
"""Test creation of ASPP decoder."""
input_specs = {'1': tf.TensorShape([1, 128, 128, 3])}
network = decoders.ASPP(
level=level,
dilation_rates=dilation_rates,
num_filters=num_filters,
use_sync_bn=True)
model_config = configs.semantic_segmentation.SemanticSegmentationModel()
model_config.num_classes = 10
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='aspp',
aspp=decoders_cfg.ASPP(
level=level, dilation_rates=dilation_rates,
num_filters=num_filters))
factory_network = factory.build_decoder(
input_specs=input_specs, model_config=model_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
def test_identity_decoder_creation(self):
"""Test creation of identity decoder."""
model_config = configs.retinanet.RetinaNet()
model_config.num_classes = 2
model_config.input_size = [None, None, 3]
model_config.decoder = decoders_cfg.Decoder(
type='identity', identity=decoders_cfg.Identity())
factory_network = factory.build_decoder(
input_specs=None, model_config=model_config)
self.assertIsNone(factory_network)
if __name__ == '__main__':
tf.test.main()
......@@ -16,9 +16,12 @@
from typing import Any, Mapping, Optional
# Import libraries
import tensorflow as tf
from official.modeling import hyperparams
from official.modeling import tf_utils
from official.vision.beta.modeling.decoders import factory
from official.vision.beta.ops import spatial_transform_ops
......@@ -187,3 +190,43 @@ class FPN(tf.keras.Model):
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('fpn')
def build_fpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds FPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the FPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `fpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'fpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `fpn`.')
norm_activation_config = model_config.norm_activation
return FPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
......@@ -19,6 +19,7 @@
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.modeling.backbones import mobilenet
from official.vision.beta.modeling.backbones import resnet
from official.vision.beta.modeling.decoders import fpn
......@@ -52,6 +53,33 @@ class FPNTest(parameterized.TestCase, tf.test.TestCase):
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
@parameterized.parameters(
(256, 3, 7, False),
(256, 3, 7, True),
)
def test_network_creation_with_mobilenet(self, input_size, min_level,
max_level, use_separable_conv):
"""Test creation of FPN with mobilenet backbone."""
tf.keras.backend.set_image_data_format('channels_last')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
backbone = mobilenet.MobileNet(model_id='MobileNetV2')
network = fpn.FPN(
input_specs=backbone.output_specs,
min_level=min_level,
max_level=max_level,
use_separable_conv=use_separable_conv)
endpoints = backbone(inputs)
feats = network(endpoints)
for level in range(min_level, max_level + 1):
self.assertIn(str(level), feats)
self.assertAllEqual(
[1, input_size // 2**level, input_size // 2**level, 256],
feats[str(level)].shape.as_list())
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
......
......@@ -13,12 +13,16 @@
# limitations under the License.
"""Contains definitions of NAS-FPN."""
from typing import Any, Mapping, List, Tuple, Optional
from typing import Any, List, Mapping, Optional, Tuple
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.vision.beta.modeling.decoders import factory
from official.vision.beta.ops import spatial_transform_ops
......@@ -316,3 +320,45 @@ class NASFPN(tf.keras.Model):
def output_specs(self) -> Mapping[str, tf.TensorShape]:
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
@factory.register_decoder_builder('nasfpn')
def build_nasfpn_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> tf.keras.Model:
"""Builds NASFPN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the NASFPN decoder.
Raises:
ValueError: If the model_config.decoder.type is not `nasfpn`.
"""
decoder_type = model_config.decoder.type
decoder_cfg = model_config.decoder.get()
if decoder_type != 'nasfpn':
raise ValueError(f'Inconsistent decoder type {decoder_type}. '
'Need to be `nasfpn`.')
norm_activation_config = model_config.norm_activation
return NASFPN(
input_specs=input_specs,
min_level=model_config.min_level,
max_level=model_config.max_level,
num_filters=decoder_cfg.num_filters,
num_repeats=decoder_cfg.num_repeats,
use_separable_conv=decoder_cfg.use_separable_conv,
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
......@@ -24,10 +24,10 @@ from official.vision.beta.configs import retinanet as retinanet_cfg
from official.vision.beta.configs import semantic_segmentation as segmentation_cfg
from official.vision.beta.modeling import backbones
from official.vision.beta.modeling import classification_model
from official.vision.beta.modeling import decoders
from official.vision.beta.modeling import maskrcnn_model
from official.vision.beta.modeling import retinanet_model
from official.vision.beta.modeling import segmentation_model
from official.vision.beta.modeling.decoders import factory as decoder_factory
from official.vision.beta.modeling.heads import dense_prediction_heads
from official.vision.beta.modeling.heads import instance_heads
from official.vision.beta.modeling.heads import segmentation_heads
......@@ -78,7 +78,7 @@ def build_maskrcnn(
l2_regularizer=l2_regularizer)
backbone(tf.keras.Input(input_specs.shape[1:]))
decoder = decoder_factory.build_decoder(
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
......@@ -253,7 +253,7 @@ def build_retinanet(
l2_regularizer=l2_regularizer)
backbone(tf.keras.Input(input_specs.shape[1:]))
decoder = decoder_factory.build_decoder(
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
......@@ -313,7 +313,7 @@ def build_segmentation_model(
norm_activation_config=norm_activation_config,
l2_regularizer=l2_regularizer)
decoder = decoder_factory.build_decoder(
decoder = decoders.factory.build_decoder(
input_specs=backbone.output_specs,
model_config=model_config,
l2_regularizer=l2_regularizer)
......
......@@ -68,6 +68,39 @@ def round_filters(filters: int,
return int(new_filters)
def hard_swish(x: tf.Tensor) -> tf.Tensor:
"""A Swish6/H-Swish activation function.
Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
https://arxiv.org/pdf/1905.02244.pdf
Args:
x: the input tensor.
Returns:
The activation output.
"""
return x * tf.nn.relu6(x + 3.) * (1. / 6.)
tf.keras.utils.get_custom_objects().update({'hard_swish': hard_swish})
def simple_swish(x: tf.Tensor) -> tf.Tensor:
"""A swish/silu activation function without custom gradients.
Useful for exporting to SavedModel to avoid custom gradient warnings.
Args:
x: the input tensor.
Returns:
The activation output.
"""
return x * tf.math.sigmoid(x)
tf.keras.utils.get_custom_objects().update({'simple_swish': simple_swish})
@tf.keras.utils.register_keras_serializable(package='Vision')
class SqueezeExcitation(tf.keras.layers.Layer):
"""Creates a squeeze and excitation layer."""
......@@ -706,9 +739,10 @@ class CausalConvMixin:
self._use_buffered_input = variable
def _compute_buffered_causal_padding(self,
inputs: Optional[tf.Tensor] = None,
inputs: tf.Tensor,
use_buffered_input: bool = False,
time_axis: int = 1) -> List[List[int]]:
time_axis: int = 1,
) -> List[List[int]]:
"""Calculates padding for 'causal' option for conv layers.
Args:
......@@ -720,7 +754,7 @@ class CausalConvMixin:
Returns:
A list of paddings for `tf.pad`.
"""
del inputs
input_shape = tf.shape(inputs)[1:-1]
if tf.keras.backend.image_data_format() == 'channels_first':
raise ValueError('"channels_first" mode is unsupported.')
......@@ -730,7 +764,10 @@ class CausalConvMixin:
(self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1))
for i in range(self.rank)
]
pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
pad_total = [kernel_size_effective[0] - 1]
for i in range(1, self.rank):
overlap = (input_shape[i] - 1) % self.strides[i] + 1
pad_total.append(tf.maximum(kernel_size_effective[i] - overlap, 0))
pad_beg = [pad_total[i] // 2 for i in range(self.rank)]
pad_end = [pad_total[i] - pad_beg[i] for i in range(self.rank)]
padding = [[pad_beg[i], pad_end[i]] for i in range(self.rank)]
......@@ -763,7 +800,8 @@ class CausalConvMixin:
# across time should be the input shape minus any padding, assuming
# the stride across time is 1.
if self._use_buffered_input and spatial_output_shape[0] is not None:
padding = self._compute_buffered_causal_padding(use_buffered_input=False)
padding = self._compute_buffered_causal_padding(
tf.zeros([1] + spatial_output_shape + [1]), use_buffered_input=False)
spatial_output_shape[0] -= sum(padding[1])
return spatial_output_shape
......@@ -911,15 +949,13 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
base_config = super(Conv3D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape):
"""Builds the layer with the given input shape."""
super(Conv3D, self).build(input_shape)
# TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU
# in eager mode may produce incorrect output or cause a segfault.
# To avoid this issue, compile the op to TF graph using tf.function.
self._convolution_op = tf.function(
self._convolution_op, experimental_compile=True)
def call(self, inputs):
"""Call the layer with the given inputs."""
# Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
# supported when compiling with TF graph (XLA) using tf.function, so it
# is compiled by default here (b/186463870).
conv_fn = tf.function(super(Conv3D, self).call, jit_compile=True)
return conv_fn(inputs)
def _compute_causal_padding(self, inputs):
"""Computes causal padding dimensions for the given inputs."""
......
......@@ -24,6 +24,11 @@ from official.vision.beta.modeling.layers import nn_layers
class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
def test_hard_swish(self):
activation = tf.keras.layers.Activation('hard_swish')
output = activation(tf.constant([-3, -1.5, 0, 3]))
self.assertAllEqual(output, [0., -0.375, 0., 3.])
def test_scale(self):
scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.))
output = scale(3.)
......@@ -274,14 +279,14 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted = conv3d(padded_inputs)
expected = tf.constant(
[[[[[12., 12., 12.],
[[[[[27., 27., 27.],
[18., 18., 18.]],
[[18., 18., 18.],
[27., 27., 27.]]],
[[[24., 24., 24.],
[12., 12., 12.]]],
[[[54., 54., 54.],
[36., 36., 36.]],
[[36., 36., 36.],
[54., 54., 54.]]]]])
[24., 24., 24.]]]]])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
......@@ -311,14 +316,17 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted = conv3d(padded_inputs)
expected = tf.constant(
[[[[[4.0, 4.0, 4.0],
[[[[[9.0, 9.0, 9.0],
[6.0, 6.0, 6.0]],
[[6.0, 6.0, 6.0],
[9.0, 9.0, 9.0]]],
[[[8.0, 8.0, 8.0],
[4.0, 4.0, 4.0]]],
[[[18.0, 18.0, 18.0],
[12., 12., 12.]],
[[12., 12., 12.],
[18., 18., 18.]]]]])
[8., 8., 8.]]]]])
output_shape = conv3d._spatial_output_shape([4, 4, 4])
self.assertAllClose(output_shape, [2, 2, 2])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
......@@ -329,5 +337,74 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
def test_conv3d_causal_padding_2d(self):
"""Test to ensure causal padding works like standard padding."""
conv3d = nn_layers.Conv3D(
filters=1,
kernel_size=(1, 3, 3),
strides=(1, 2, 2),
padding='causal',
use_buffered_input=False,
kernel_initializer='ones',
use_bias=False,
)
keras_conv3d = tf.keras.layers.Conv3D(
filters=1,
kernel_size=(1, 3, 3),
strides=(1, 2, 2),
padding='same',
kernel_initializer='ones',
use_bias=False,
)
inputs = tf.ones([1, 1, 4, 4, 1])
predicted = conv3d(inputs)
expected = keras_conv3d(inputs)
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllClose(predicted,
[[[[[9.],
[6.]],
[[6.],
[4.]]]]])
def test_conv3d_causal_padding_1d(self):
"""Test to ensure causal padding works like standard padding."""
conv3d = nn_layers.Conv3D(
filters=1,
kernel_size=(3, 1, 1),
strides=(2, 1, 1),
padding='causal',
use_buffered_input=False,
kernel_initializer='ones',
use_bias=False,
)
keras_conv1d = tf.keras.layers.Conv1D(
filters=1,
kernel_size=3,
strides=2,
padding='causal',
kernel_initializer='ones',
use_bias=False,
)
inputs = tf.ones([1, 4, 1, 1, 1])
predicted = conv3d(inputs)
expected = keras_conv1d(tf.squeeze(inputs, axis=[2, 3]))
expected = tf.reshape(expected, [1, 2, 1, 1, 1])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllClose(predicted,
[[[[[1.]]],
[[[3.]]]]])
if __name__ == '__main__':
tf.test.main()
......@@ -16,7 +16,6 @@
from typing import Any, List, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.vision.beta.ops import anchor
......@@ -147,14 +146,18 @@ class MaskRCNNModel(tf.keras.Model):
model_outputs = {}
# Feature extraction.
features = self.backbone(images)
backbone_features = self.backbone(images)
if self.decoder:
features = self.decoder(features)
features = self.decoder(backbone_features)
else:
features = backbone_features
# Region proposal network.
rpn_scores, rpn_boxes = self.rpn_head(features)
model_outputs.update({
'backbone_features': backbone_features,
'decoder_features': features,
'rpn_boxes': rpn_boxes,
'rpn_scores': rpn_scores
})
......
......@@ -1205,7 +1205,8 @@ class RandAugment(ImageAugment):
self.magnitude = float(magnitude)
self.cutout_const = float(cutout_const)
self.translate_const = float(translate_const)
self.prob_to_apply = prob_to_apply
self.prob_to_apply = (
float(prob_to_apply) if prob_to_apply is not None else None)
self.available_ops = [
'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize',
'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY',
......
......@@ -198,7 +198,8 @@ def multilevel_crop_and_resize(features,
# Assigns boxes to the right level.
box_width = boxes[:, :, 3] - boxes[:, :, 1]
box_height = boxes[:, :, 2] - boxes[:, :, 0]
areas_sqrt = tf.cast(tf.sqrt(box_height * box_width), tf.float32)
areas_sqrt = tf.sqrt(
tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))
levels = tf.cast(
tf.math.floordiv(
tf.math.log(tf.divide(areas_sqrt, 224.0)),
......@@ -456,6 +457,12 @@ def crop_mask_in_target_box(masks,
[batch_size, num_boxes, output_size, output_size].
"""
with tf.name_scope('crop_mask_in_target_box'):
# Cast to float32, as the y_transform and other transform variables may
# overflow in float16
masks = tf.cast(masks, tf.float32)
boxes = tf.cast(boxes, tf.float32)
target_boxes = tf.cast(target_boxes, tf.float32)
batch_size, num_masks, height, width = masks.get_shape().as_list()
if batch_size is None:
batch_size = tf.shape(masks)[0]
......@@ -504,18 +511,22 @@ def crop_mask_in_target_box(masks,
return cropped_masks
def nearest_upsampling(data, scale):
def nearest_upsampling(data, scale, use_keras_layer=False):
"""Nearest neighbor upsampling implementation.
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
use_keras_layer: If True, use keras Upsampling2D layer.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
data.
"""
if use_keras_layer:
return tf.keras.layers.UpSampling2D(size=(scale, scale),
interpolation='nearest')(data)
with tf.name_scope('nearest_upsampling'):
bs, _, _, c = data.get_shape().as_list()
shape = tf.shape(input=data)
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example experiment configuration definition."""
from typing import List
import dataclasses
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
@dataclasses.dataclass
class ExampleDataConfig(cfg.DataConfig):
"""Input config for training. Add more fields as needed."""
input_path: str = ''
global_batch_size: int = 0
is_training: bool = True
dtype: str = 'float32'
shuffle_buffer_size: int = 10000
cycle_length: int = 10
file_type: str = 'tfrecord'
@dataclasses.dataclass
class ExampleModel(hyperparams.Config):
"""The model config. Used by build_example_model function."""
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
@dataclasses.dataclass
class Losses(hyperparams.Config):
l2_weight_decay: float = 0.0
@dataclasses.dataclass
class Evaluation(hyperparams.Config):
top_k: int = 5
@dataclasses.dataclass
class ExampleTask(cfg.TaskConfig):
"""The task config."""
model: ExampleModel = ExampleModel()
train_data: ExampleDataConfig = ExampleDataConfig(is_training=True)
validation_data: ExampleDataConfig = ExampleDataConfig(is_training=False)
losses: Losses = Losses()
evaluation: Evaluation = Evaluation()
@exp_factory.register_config_factory('tf_vision_example_experiment')
def tf_vision_example_experiment() -> cfg.ExperimentConfig:
"""Definition of a full example experiment."""
train_batch_size = 256
eval_batch_size = 256
steps_per_epoch = 10
config = cfg.ExperimentConfig(
task=ExampleTask(
model=ExampleModel(num_classes=10, input_size=[128, 128, 3]),
losses=Losses(l2_weight_decay=1e-4),
train_data=ExampleDataConfig(
input_path='/path/to/train*',
is_training=True,
global_batch_size=train_batch_size),
validation_data=ExampleDataConfig(
input_path='/path/to/valid*',
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=90 * steps_per_epoch,
validation_steps=steps_per_epoch,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 1.6,
'decay_steps': 350 * steps_per_epoch
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
task:
model:
num_classes: 1001
input_size: [128, 128, 3]
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 64
dtype: 'bfloat16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'stepwise'
stepwise:
boundaries: [18750, 37500, 50000]
values: [0.1, 0.01, 0.001, 0.0001]
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 1001
input_size: [128, 128, 3]
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 4096
dtype: 'bfloat16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 4096
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'stepwise'
stepwise:
boundaries: [18750, 37500, 50000]
values: [0.1, 0.01, 0.001, 0.0001]
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example classification decoder and parser.
This file defines the Decoder and Parser to load data. The example is shown on
loading standard tf.Example data but non-standard tf.Example or other data
format can be supported by implementing proper decoder and parser.
"""
from typing import Mapping, List, Tuple
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import decoder
from official.vision.beta.dataloaders import parser
from official.vision.beta.ops import preprocess_ops
MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
class Decoder(decoder.Decoder):
"""A tf.Example decoder for classification task."""
def __init__(self):
"""Initializes the decoder.
The constructor defines the mapping between the field name and the value
from an input tf.Example. For example, we define two fields for image bytes
and labels. There is no limit on the number of fields to decode.
"""
self._keys_to_features = {
'image/encoded':
tf.io.FixedLenFeature((), tf.string, default_value=''),
'image/class/label':
tf.io.FixedLenFeature((), tf.int64, default_value=-1)
}
def decode(self,
serialized_example: tf.train.Example) -> Mapping[str, tf.Tensor]:
"""Decodes a tf.Example to a dictionary.
This function decodes a serialized tf.Example to a dictionary. The output
will be consumed by `_parse_train_data` and `_parse_validation_data` in
Parser.
Args:
serialized_example: A serialized tf.Example.
Returns:
A dictionary of field key name and decoded tensor mapping.
"""
return tf.io.parse_single_example(
serialized_example, self._keys_to_features)
class Parser(parser.Parser):
"""Parser to parse an image and its annotations.
To define own Parser, client should override _parse_train_data and
_parse_eval_data functions, where decoded tensors are parsed with optional
pre-processing steps. The output from the two functions can be any structure
like tuple, list or dictionary.
"""
def __init__(self, output_size: List[int], num_classes: float):
"""Initializes parameters for parsing annotations in the dataset.
This example only takes two arguments but one can freely add as many
arguments as needed. For example, pre-processing and augmentations usually
happen in Parser, and related parameters can be passed in by this
constructor.
Args:
output_size: `Tensor` or `list` for [height, width] of output image.
num_classes: `float`, number of classes.
"""
self._output_size = output_size
self._num_classes = num_classes
self._dtype = tf.float32
def _parse_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
image_bytes = decoded_tensors['image/encoded']
image = tf.io.decode_jpeg(image_bytes, channels=3)
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
image = tf.ensure_shape(image, self._output_size + [3])
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(
image, offset=MEAN_RGB, scale=STDDEV_RGB)
image = tf.image.convert_image_dtype(image, self._dtype)
return image, label
def _parse_train_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parses data for training.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return self._parse_data(decoded_tensors)
def _parse_eval_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parses data for evaluation.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return self._parse_data(decoded_tensors)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment