Unverified Commit 5ffcc5b6 authored by Anirudh Vegesana's avatar Anirudh Vegesana Committed by GitHub
Browse files

Merge branch 'purdue-yolo' into detection_generator_pr

parents 0b81a843 76e0c014
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tfds factory functions."""
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.dataloaders import decoder as base_decoder
from official.vision.beta.dataloaders import tfds_factory
class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('imagenet2012'),
('cifar10'),
('cifar100'),
)
def test_classification_decoder(self, tfds_name):
decoder = tfds_factory.get_classification_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
@parameterized.parameters(
('flowers'),
('coco'),
)
def test_doesnt_exit_classification_decoder(self, tfds_name):
with self.assertRaises(ValueError):
_ = tfds_factory.get_classification_decoder(tfds_name)
@parameterized.parameters(
('coco'),
('coco/2014'),
('coco/2017'),
)
def test_detection_decoder(self, tfds_name):
decoder = tfds_factory.get_detection_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
@parameterized.parameters(
('pascal'),
('cityscapes'),
)
def test_doesnt_exit_detection_decoder(self, tfds_name):
with self.assertRaises(ValueError):
_ = tfds_factory.get_detection_decoder(tfds_name)
@parameterized.parameters(
('cityscapes'),
('cityscapes/semantic_segmentation'),
('cityscapes/semantic_segmentation_extra'),
)
def test_segmentation_decoder(self, tfds_name):
decoder = tfds_factory.get_segmentation_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
@parameterized.parameters(
('coco'),
('imagenet'),
)
def test_doesnt_exit_segmentation_decoder(self, tfds_name):
with self.assertRaises(ValueError):
_ = tfds_factory.get_segmentation_decoder(tfds_name)
if __name__ == '__main__':
tf.test.main()
......@@ -143,3 +143,24 @@ def create_classification_example(
int64_list=tf.train.Int64List(value=labels))),
})).SerializeToString()
return serialized_example
def create_3d_image_test_example(image_height: int, image_width: int,
image_volume: int,
image_channel: int) -> tf.train.Example:
"""Creates 3D image and label."""
images = np.random.rand(image_height, image_width, image_volume,
image_channel)
images = images.astype(np.float32)
labels = np.random.randint(
low=2, size=(image_height, image_width, image_volume, image_channel))
labels = labels.astype(np.float32)
feature = {
IMAGE_KEY: (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[images.tobytes()]))),
CLASSIFICATION_LABEL_KEY: (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[labels.tobytes()])))
}
return tf.train.Example(features=tf.train.Features(feature=feature))
......@@ -393,8 +393,10 @@ class SpineNet(tf.keras.Model):
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
logging.warning(
'SpineNet output level out of range [min_level, max_level] = '
'[%s, %s] will not be used for further processing.',
self._min_level, self._max_level)
endpoints[str(block_spec.level)] = x
return endpoints
......
......@@ -152,6 +152,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn: bool = False,
norm_momentum: float = 0.99,
norm_epsilon: float = 0.001,
use_keras_upsampling_2d: bool = False,
**kwargs):
"""Initializes a Mobile SpineNet model.
......@@ -181,6 +182,7 @@ class SpineNetMobile(tf.keras.Model):
use_sync_bn: If True, use synchronized batch normalization.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A small `float` added to variance to avoid dividing by zero.
use_keras_upsampling_2d: If True, use keras UpSampling2D layer.
**kwargs: Additional keyword arguments to be passed.
"""
self._input_specs = input_specs
......@@ -200,12 +202,7 @@ class SpineNetMobile(tf.keras.Model):
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation_fn = tf.nn.relu
elif activation == 'swish':
self._activation_fn = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._use_keras_upsampling_2d = use_keras_upsampling_2d
self._num_init_blocks = 2
if use_sync_bn:
......@@ -271,7 +268,7 @@ class SpineNetMobile(tf.keras.Model):
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
return tf.identity(x, name=name)
return tf.keras.layers.Activation('linear', name=name)(x)
def _build_stem(self, inputs):
"""Builds SpineNet stem."""
......@@ -290,7 +287,7 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
net = []
stem_strides = [1, 2]
......@@ -365,14 +362,15 @@ class SpineNetMobile(tf.keras.Model):
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
weights_sum = layers.Add()(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation_fn)(tf.add_n(parents))
x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(layers.Add()(parents))
x = self._block_group(
inputs=x,
in_filters=target_num_filters,
......@@ -421,7 +419,7 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(self._activation, use_keras_layer=True)(x)
endpoints[str(level)] = x
return endpoints
......@@ -446,11 +444,13 @@ class SpineNetMobile(tf.keras.Model):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation_fn)(x)
x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = spatial_transform_ops.nearest_upsampling(x, scale=scale)
x = spatial_transform_ops.nearest_upsampling(
x, scale=scale, use_keras_layer=self._use_keras_upsampling_2d)
# Last 1x1 conv to match filter size.
x = layers.Conv2D(
......@@ -485,7 +485,8 @@ class SpineNetMobile(tf.keras.Model):
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
'norm_epsilon': self._norm_epsilon,
'use_keras_upsampling_2d': self._use_keras_upsampling_2d,
}
return config_dict
......@@ -531,4 +532,5 @@ def build_spinenet_mobile(
activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon)
norm_epsilon=norm_activation_config.norm_epsilon,
use_keras_upsampling_2d=backbone_cfg.use_keras_upsampling_2d)
......@@ -90,6 +90,7 @@ class SpineNetMobileTest(parameterized.TestCase, tf.test.TestCase):
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
use_keras_upsampling_2d=False,
)
network = spinenet_mobile.SpineNetMobile(**kwargs)
......
......@@ -24,17 +24,16 @@ from official.vision.beta.modeling.backbones import spinenet
class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(128, 0.65, 1, 0.5, 128),
(256, 1.0, 1, 0.5, 256),
(384, 1.0, 2, 0.5, 256),
(512, 1.0, 3, 1.0, 256),
(640, 1.3, 4, 1.0, 384),
(128, 0.65, 1, 0.5, 128, 4, 6),
(256, 1.0, 1, 0.5, 256, 3, 6),
(384, 1.0, 2, 0.5, 256, 4, 7),
(512, 1.0, 3, 1.0, 256, 3, 7),
(640, 1.3, 4, 1.0, 384, 3, 7),
)
def test_network_creation(self, input_size, filter_size_scale, block_repeats,
resample_alpha, endpoints_num_filters):
resample_alpha, endpoints_num_filters, min_level,
max_level):
"""Test creation of SpineNet models."""
min_level = 3
max_level = 7
tf.keras.backend.set_image_data_format('channels_last')
......
......@@ -26,6 +26,10 @@ from official.modeling import tf_utils
States = Dict[str, tf.Tensor]
Activation = Union[str, Callable]
# TODO(dankondratyuk): keep legacy padding until new checkpoints are trained.
# Otherwise, accuracy will be affected.
LEGACY_PADDING = True
def make_divisible(value: float,
divisor: int,
......@@ -68,6 +72,23 @@ def round_filters(filters: int,
return int(new_filters)
def hard_swish(x: tf.Tensor) -> tf.Tensor:
"""A Swish6/H-Swish activation function.
Reference: Section 5.2 of Howard et al. "Searching for MobileNet V3."
https://arxiv.org/pdf/1905.02244.pdf
Args:
x: the input tensor.
Returns:
The activation output.
"""
return x * tf.nn.relu6(x + 3.) * (1. / 6.)
tf.keras.utils.get_custom_objects().update({'hard_swish': hard_swish})
@tf.keras.utils.register_keras_serializable(package='Vision')
class SqueezeExcitation(tf.keras.layers.Layer):
"""Creates a squeeze and excitation layer."""
......@@ -706,9 +727,10 @@ class CausalConvMixin:
self._use_buffered_input = variable
def _compute_buffered_causal_padding(self,
inputs: Optional[tf.Tensor] = None,
inputs: tf.Tensor,
use_buffered_input: bool = False,
time_axis: int = 1) -> List[List[int]]:
time_axis: int = 1,
) -> List[List[int]]:
"""Calculates padding for 'causal' option for conv layers.
Args:
......@@ -720,7 +742,7 @@ class CausalConvMixin:
Returns:
A list of paddings for `tf.pad`.
"""
del inputs
input_shape = tf.shape(inputs)[1:-1]
if tf.keras.backend.image_data_format() == 'channels_first':
raise ValueError('"channels_first" mode is unsupported.')
......@@ -730,7 +752,14 @@ class CausalConvMixin:
(self.kernel_size[i] - 1) * (self.dilation_rate[i] - 1))
for i in range(self.rank)
]
pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
if LEGACY_PADDING:
# Apply legacy padding that does not take into account spatial strides
pad_total = [kernel_size_effective[i] - 1 for i in range(self.rank)]
else:
pad_total = [kernel_size_effective[0] - 1]
for i in range(1, self.rank):
overlap = (input_shape[i] - 1) % self.strides[i] + 1
pad_total.append(tf.maximum(kernel_size_effective[i] - overlap, 0))
pad_beg = [pad_total[i] // 2 for i in range(self.rank)]
pad_end = [pad_total[i] - pad_beg[i] for i in range(self.rank)]
padding = [[pad_beg[i], pad_end[i]] for i in range(self.rank)]
......@@ -763,7 +792,8 @@ class CausalConvMixin:
# across time should be the input shape minus any padding, assuming
# the stride across time is 1.
if self._use_buffered_input and spatial_output_shape[0] is not None:
padding = self._compute_buffered_causal_padding(use_buffered_input=False)
padding = self._compute_buffered_causal_padding(
tf.zeros([1] + spatial_output_shape + [1]), use_buffered_input=False)
spatial_output_shape[0] -= sum(padding[1])
return spatial_output_shape
......@@ -911,15 +941,13 @@ class Conv3D(tf.keras.layers.Conv3D, CausalConvMixin):
base_config = super(Conv3D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def build(self, input_shape):
"""Builds the layer with the given input shape."""
super(Conv3D, self).build(input_shape)
# TODO(b/177662019): tf.nn.conv3d with depthwise kernels on CPU
# in eager mode may produce incorrect output or cause a segfault.
# To avoid this issue, compile the op to TF graph using tf.function.
self._convolution_op = tf.function(
self._convolution_op, experimental_compile=True)
def call(self, inputs):
"""Call the layer with the given inputs."""
# Note: tf.nn.conv3d with depthwise kernels on CPU is currently only
# supported when compiling with TF graph (XLA) using tf.function, so it
# is compiled by default here (b/186463870).
conv_fn = tf.function(super(Conv3D, self).call, jit_compile=True)
return conv_fn(inputs)
def _compute_causal_padding(self, inputs):
"""Computes causal padding dimensions for the given inputs."""
......
......@@ -24,6 +24,15 @@ from official.vision.beta.modeling.layers import nn_layers
class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
def setUp(self):
super().setUp()
nn_layers.LEGACY_PADDING = False
def test_hard_swish(self):
activation = tf.keras.layers.Activation('hard_swish')
output = activation(tf.constant([-3, -1.5, 0, 3]))
self.assertAllEqual(output, [0., -0.375, 0., 3.])
def test_scale(self):
scale = nn_layers.Scale(initializer=tf.keras.initializers.constant(10.))
output = scale(3.)
......@@ -274,14 +283,14 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted = conv3d(padded_inputs)
expected = tf.constant(
[[[[[12., 12., 12.],
[[[[[27., 27., 27.],
[18., 18., 18.]],
[[18., 18., 18.],
[27., 27., 27.]]],
[[[24., 24., 24.],
[12., 12., 12.]]],
[[[54., 54., 54.],
[36., 36., 36.]],
[[36., 36., 36.],
[54., 54., 54.]]]]])
[24., 24., 24.]]]]])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
......@@ -311,14 +320,17 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted = conv3d(padded_inputs)
expected = tf.constant(
[[[[[4.0, 4.0, 4.0],
[[[[[9.0, 9.0, 9.0],
[6.0, 6.0, 6.0]],
[[6.0, 6.0, 6.0],
[9.0, 9.0, 9.0]]],
[[[8.0, 8.0, 8.0],
[4.0, 4.0, 4.0]]],
[[[18.0, 18.0, 18.0],
[12., 12., 12.]],
[[12., 12., 12.],
[18., 18., 18.]]]]])
[8., 8., 8.]]]]])
output_shape = conv3d._spatial_output_shape([4, 4, 4])
self.assertAllClose(output_shape, [2, 2, 2])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
......@@ -329,5 +341,74 @@ class NNLayersTest(parameterized.TestCase, tf.test.TestCase):
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
def test_conv3d_causal_padding_2d(self):
"""Test to ensure causal padding works like standard padding."""
conv3d = nn_layers.Conv3D(
filters=1,
kernel_size=(1, 3, 3),
strides=(1, 2, 2),
padding='causal',
use_buffered_input=False,
kernel_initializer='ones',
use_bias=False,
)
keras_conv3d = tf.keras.layers.Conv3D(
filters=1,
kernel_size=(1, 3, 3),
strides=(1, 2, 2),
padding='same',
kernel_initializer='ones',
use_bias=False,
)
inputs = tf.ones([1, 1, 4, 4, 1])
predicted = conv3d(inputs)
expected = keras_conv3d(inputs)
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllClose(predicted,
[[[[[9.],
[6.]],
[[6.],
[4.]]]]])
def test_conv3d_causal_padding_1d(self):
"""Test to ensure causal padding works like standard padding."""
conv3d = nn_layers.Conv3D(
filters=1,
kernel_size=(3, 1, 1),
strides=(2, 1, 1),
padding='causal',
use_buffered_input=False,
kernel_initializer='ones',
use_bias=False,
)
keras_conv1d = tf.keras.layers.Conv1D(
filters=1,
kernel_size=3,
strides=2,
padding='causal',
kernel_initializer='ones',
use_bias=False,
)
inputs = tf.ones([1, 4, 1, 1, 1])
predicted = conv3d(inputs)
expected = keras_conv1d(tf.squeeze(inputs, axis=[2, 3]))
expected = tf.reshape(expected, [1, 2, 1, 1, 1])
self.assertEqual(predicted.shape, expected.shape)
self.assertAllClose(predicted, expected)
self.assertAllClose(predicted,
[[[[[1.]]],
[[[3.]]]]])
if __name__ == '__main__':
tf.test.main()
......@@ -16,7 +16,6 @@
from typing import Any, List, Mapping, Optional, Union
# Import libraries
import tensorflow as tf
from official.vision.beta.ops import anchor
......@@ -147,14 +146,18 @@ class MaskRCNNModel(tf.keras.Model):
model_outputs = {}
# Feature extraction.
features = self.backbone(images)
backbone_features = self.backbone(images)
if self.decoder:
features = self.decoder(features)
features = self.decoder(backbone_features)
else:
features = backbone_features
# Region proposal network.
rpn_scores, rpn_boxes = self.rpn_head(features)
model_outputs.update({
'backbone_features': backbone_features,
'decoder_features': features,
'rpn_boxes': rpn_boxes,
'rpn_scores': rpn_scores
})
......
......@@ -1205,7 +1205,8 @@ class RandAugment(ImageAugment):
self.magnitude = float(magnitude)
self.cutout_const = float(cutout_const)
self.translate_const = float(translate_const)
self.prob_to_apply = prob_to_apply
self.prob_to_apply = (
float(prob_to_apply) if prob_to_apply is not None else None)
self.available_ops = [
'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize',
'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY',
......
......@@ -198,7 +198,8 @@ def multilevel_crop_and_resize(features,
# Assigns boxes to the right level.
box_width = boxes[:, :, 3] - boxes[:, :, 1]
box_height = boxes[:, :, 2] - boxes[:, :, 0]
areas_sqrt = tf.cast(tf.sqrt(box_height * box_width), tf.float32)
areas_sqrt = tf.sqrt(
tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))
levels = tf.cast(
tf.math.floordiv(
tf.math.log(tf.divide(areas_sqrt, 224.0)),
......@@ -456,6 +457,12 @@ def crop_mask_in_target_box(masks,
[batch_size, num_boxes, output_size, output_size].
"""
with tf.name_scope('crop_mask_in_target_box'):
# Cast to float32, as the y_transform and other transform variables may
# overflow in float16
masks = tf.cast(masks, tf.float32)
boxes = tf.cast(boxes, tf.float32)
target_boxes = tf.cast(target_boxes, tf.float32)
batch_size, num_masks, height, width = masks.get_shape().as_list()
if batch_size is None:
batch_size = tf.shape(masks)[0]
......@@ -504,18 +511,22 @@ def crop_mask_in_target_box(masks,
return cropped_masks
def nearest_upsampling(data, scale):
def nearest_upsampling(data, scale, use_keras_layer=False):
"""Nearest neighbor upsampling implementation.
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
use_keras_layer: If True, use keras Upsampling2D layer.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
data.
"""
if use_keras_layer:
return tf.keras.layers.UpSampling2D(size=(scale, scale),
interpolation='nearest')(data)
with tf.name_scope('nearest_upsampling'):
bs, _, _, c = data.get_shape().as_list()
shape = tf.shape(input=data)
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example experiment configuration definition."""
from typing import List
import dataclasses
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
@dataclasses.dataclass
class ExampleDataConfig(cfg.DataConfig):
"""Input config for training. Add more fields as needed."""
input_path: str = ''
global_batch_size: int = 0
is_training: bool = True
dtype: str = 'float32'
shuffle_buffer_size: int = 10000
cycle_length: int = 10
file_type: str = 'tfrecord'
@dataclasses.dataclass
class ExampleModel(hyperparams.Config):
"""The model config. Used by build_example_model function."""
num_classes: int = 0
input_size: List[int] = dataclasses.field(default_factory=list)
@dataclasses.dataclass
class Losses(hyperparams.Config):
l2_weight_decay: float = 0.0
@dataclasses.dataclass
class Evaluation(hyperparams.Config):
top_k: int = 5
@dataclasses.dataclass
class ExampleTask(cfg.TaskConfig):
"""The task config."""
model: ExampleModel = ExampleModel()
train_data: ExampleDataConfig = ExampleDataConfig(is_training=True)
validation_data: ExampleDataConfig = ExampleDataConfig(is_training=False)
losses: Losses = Losses()
evaluation: Evaluation = Evaluation()
@exp_factory.register_config_factory('tf_vision_example_experiment')
def tf_vision_example_experiment() -> cfg.ExperimentConfig:
"""Definition of a full example experiment."""
train_batch_size = 256
eval_batch_size = 256
steps_per_epoch = 10
config = cfg.ExperimentConfig(
task=ExampleTask(
model=ExampleModel(num_classes=10, input_size=[128, 128, 3]),
losses=Losses(l2_weight_decay=1e-4),
train_data=ExampleDataConfig(
input_path='/path/to/train*',
is_training=True,
global_batch_size=train_batch_size),
validation_data=ExampleDataConfig(
input_path='/path/to/valid*',
is_training=False,
global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig(
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
train_steps=90 * steps_per_epoch,
validation_steps=steps_per_epoch,
validation_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'sgd',
'sgd': {
'momentum': 0.9
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 1.6,
'decay_steps': 350 * steps_per_epoch
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 5 * steps_per_epoch,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
task:
model:
num_classes: 1001
input_size: [128, 128, 3]
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 64
dtype: 'bfloat16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 64
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'stepwise'
stepwise:
boundaries: [18750, 37500, 50000]
values: [0.1, 0.01, 0.001, 0.0001]
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 1001
input_size: [128, 128, 3]
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 4096
dtype: 'bfloat16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 4096
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'stepwise'
stepwise:
boundaries: [18750, 37500, 50000]
values: [0.1, 0.01, 0.001, 0.0001]
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example classification decoder and parser.
This file defines the Decoder and Parser to load data. The example is shown on
loading standard tf.Example data but non-standard tf.Example or other data
format can be supported by implementing proper decoder and parser.
"""
from typing import Mapping, List, Tuple
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import decoder
from official.vision.beta.dataloaders import parser
from official.vision.beta.ops import preprocess_ops
MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
class Decoder(decoder.Decoder):
"""A tf.Example decoder for classification task."""
def __init__(self):
"""Initializes the decoder.
The constructor defines the mapping between the field name and the value
from an input tf.Example. For example, we define two fields for image bytes
and labels. There is no limit on the number of fields to decode.
"""
self._keys_to_features = {
'image/encoded':
tf.io.FixedLenFeature((), tf.string, default_value=''),
'image/class/label':
tf.io.FixedLenFeature((), tf.int64, default_value=-1)
}
def decode(self,
serialized_example: tf.train.Example) -> Mapping[str, tf.Tensor]:
"""Decodes a tf.Example to a dictionary.
This function decodes a serialized tf.Example to a dictionary. The output
will be consumed by `_parse_train_data` and `_parse_validation_data` in
Parser.
Args:
serialized_example: A serialized tf.Example.
Returns:
A dictionary of field key name and decoded tensor mapping.
"""
return tf.io.parse_single_example(
serialized_example, self._keys_to_features)
class Parser(parser.Parser):
"""Parser to parse an image and its annotations.
To define own Parser, client should override _parse_train_data and
_parse_eval_data functions, where decoded tensors are parsed with optional
pre-processing steps. The output from the two functions can be any structure
like tuple, list or dictionary.
"""
def __init__(self, output_size: List[int], num_classes: float):
"""Initializes parameters for parsing annotations in the dataset.
This example only takes two arguments but one can freely add as many
arguments as needed. For example, pre-processing and augmentations usually
happen in Parser, and related parameters can be passed in by this
constructor.
Args:
output_size: `Tensor` or `list` for [height, width] of output image.
num_classes: `float`, number of classes.
"""
self._output_size = output_size
self._num_classes = num_classes
self._dtype = tf.float32
def _parse_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
image_bytes = decoded_tensors['image/encoded']
image = tf.io.decode_jpeg(image_bytes, channels=3)
image = tf.image.resize(
image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
image = tf.ensure_shape(image, self._output_size + [3])
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(
image, offset=MEAN_RGB, scale=STDDEV_RGB)
image = tf.image.convert_image_dtype(image, self._dtype)
return image, label
def _parse_train_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parses data for training.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return self._parse_data(decoded_tensors)
def _parse_eval_data(
self, decoded_tensors: Mapping[str,
tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parses data for evaluation.
Args:
decoded_tensors: A dictionary of field key name and decoded tensor mapping
from Decoder.
Returns:
A tuple of (image, label) tensors.
"""
return self._parse_data(decoded_tensors)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A sample model implementation.
This is only a dummy example to showcase how a model is composed. It is usually
not needed to implement a modedl from scratch. Most SoTA models can be found and
directly used from `official/vision/beta/modeling` directory.
"""
from typing import Any, Mapping
# Import libraries
import tensorflow as tf
from official.vision.beta.projects.example import example_config as example_cfg
@tf.keras.utils.register_keras_serializable(package='Vision')
class ExampleModel(tf.keras.Model):
"""A example model class.
A model is a subclass of tf.keras.Model where layers are built in the
constructor.
"""
def __init__(
self,
num_classes: int,
input_specs: tf.keras.layers.InputSpec = tf.keras.layers.InputSpec(
shape=[None, None, None, 3]),
**kwargs):
"""Initializes the example model.
All layers are defined in the constructor, and config is recorded in the
`_config_dict` object for serialization.
Args:
num_classes: The number of classes in classification task.
input_specs: A `tf.keras.layers.InputSpec` spec of the input tensor.
**kwargs: Additional keyword arguments to be passed.
"""
inputs = tf.keras.Input(shape=input_specs.shape[1:], name=input_specs.name)
outputs = tf.keras.layers.Conv2D(
filters=16, kernel_size=3, strides=2, padding='same', use_bias=False)(
inputs)
outputs = tf.keras.layers.Conv2D(
filters=32, kernel_size=3, strides=2, padding='same', use_bias=False)(
outputs)
outputs = tf.keras.layers.Conv2D(
filters=64, kernel_size=3, strides=2, padding='same', use_bias=False)(
outputs)
outputs = tf.keras.layers.GlobalAveragePooling2D()(outputs)
outputs = tf.keras.layers.Dense(1024, activation='relu')(outputs)
outputs = tf.keras.layers.Dense(num_classes)(outputs)
super().__init__(inputs=inputs, outputs=outputs, **kwargs)
self._input_specs = input_specs
self._config_dict = {'num_classes': num_classes, 'input_specs': input_specs}
def get_config(self) -> Mapping[str, Any]:
"""Gets the config of this model."""
return self._config_dict
@classmethod
def from_config(cls, config, custom_objects=None):
"""Constructs an instance of this model from input config."""
return cls(**config)
def build_example_model(input_specs: tf.keras.layers.InputSpec,
model_config: example_cfg.ExampleModel,
**kwargs) -> tf.keras.Model:
"""Builds and returns the example model.
This function is the main entry point to build a model. Commonly, it build a
model by building a backbone, decoder and head. An example of building a
classification model is at
third_party/tensorflow_models/official/vision/beta/modeling/backbones/resnet.py.
However, it is not mandatory for all models to have these three pieces
exactly. Depending on the task, model can be as simple as the example model
here or more complex, such as multi-head architecture.
Args:
input_specs: The specs of the input layer that defines input size.
model_config: The config containing parameters to build a model.
**kwargs: Additional keyword arguments to be passed.
Returns:
A tf.keras.Model object.
"""
return ExampleModel(
num_classes=model_config.num_classes, input_specs=input_specs, **kwargs)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example task definition for image classification."""
from typing import Any, List, Optional, Tuple, Sequence, Mapping
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import task_factory
from official.modeling import tf_utils
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.projects.example import example_config as exp_cfg
from official.vision.beta.projects.example import example_input
from official.vision.beta.projects.example import example_model
@task_factory.register_task_cls(exp_cfg.ExampleTask)
class ExampleTask(base_task.Task):
"""Class of an example task.
A task is a subclass of base_task.Task that defines model, input, loss, metric
and one training and evaluation step, etc.
"""
def build_model(self) -> tf.keras.Model:
"""Builds a model."""
input_specs = tf.keras.layers.InputSpec(shape=[None] +
self.task_config.model.input_size)
model = example_model.build_example_model(
input_specs=input_specs, model_config=self.task_config.model)
return model
def build_inputs(
self,
params: exp_cfg.ExampleDataConfig,
input_context: Optional[tf.distribute.InputContext] = None
) -> tf.data.Dataset:
"""Builds input.
The input from this function is a tf.data.Dataset that has gone through
pre-processing steps, such as augmentation, batching, shuffuling, etc.
Args:
params: The experiment config.
input_context: An optional InputContext used by input reader.
Returns:
A tf.data.Dataset object.
"""
num_classes = self.task_config.model.num_classes
input_size = self.task_config.model.input_size
decoder = example_input.Decoder()
parser = example_input.Parser(
output_size=input_size[:2], num_classes=num_classes)
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read(input_context=input_context)
return dataset
def build_losses(self,
labels: tf.Tensor,
model_outputs: tf.Tensor,
aux_losses: Optional[Any] = None) -> tf.Tensor:
"""Builds losses for training and validation.
Args:
labels: Input groundtruth labels.
model_outputs: Output of the model.
aux_losses: The auxiliarly loss tensors, i.e. `losses` in tf.keras.Model.
Returns:
The total loss tensor.
"""
total_loss = tf.keras.losses.sparse_categorical_crossentropy(
labels, model_outputs, from_logits=True)
total_loss = tf_utils.safe_mean(total_loss)
if aux_losses:
total_loss += tf.add_n(aux_losses)
return total_loss
def build_metrics(self,
training: bool = True) -> Sequence[tf.keras.metrics.Metric]:
"""Gets streaming metrics for training/validation.
This function builds and returns a list of metrics to compute during
training and validation. The list contains objects of subclasses of
tf.keras.metrics.Metric. Training and validation can have different metrics.
Args:
training: Whether the metric is for training or not.
Returns:
A list of tf.keras.metrics.Metric objects.
"""
k = self.task_config.evaluation.top_k
metrics = [
tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
tf.keras.metrics.SparseTopKCategoricalAccuracy(
k=k, name='top_{}_accuracy'.format(k))
]
return metrics
def train_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
optimizer: tf.keras.optimizers.Optimizer,
metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
"""Does forward and backward.
This example assumes input is a tuple of (features, labels), which follows
the output from data loader, i.e., Parser. The output from Parser is fed
into train_step to perform one step forward and backward pass. Other data
structure, such as dictionary, can also be used, as long as it is consistent
between output from Parser and input used here.
Args:
inputs: A tuple of of input tensors of (features, labels).
model: A tf.keras.Model instance.
optimizer: The optimizer for this training step.
metrics: A nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
with tf.GradientTape() as tape:
outputs = model(features, training=True)
# Casting output layer as float32 is necessary when mixed_precision is
# mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
# Computes per-replica loss.
loss = self.build_losses(
model_outputs=outputs, labels=labels, aux_losses=model.losses)
# Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss = loss / num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
scaled_loss = optimizer.get_scaled_loss(scaled_loss)
tvars = model.trainable_variables
grads = tape.gradient(scaled_loss, tvars)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
# used.
if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
grads = optimizer.get_unscaled_gradients(grads)
optimizer.apply_gradients(list(zip(grads, tvars)))
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
return logs
def validation_step(self,
inputs: Tuple[Any, Any],
model: tf.keras.Model,
metrics: Optional[List[Any]] = None) -> Mapping[str, Any]:
"""Runs validatation step.
Args:
inputs: A tuple of of input tensors of (features, labels).
model: A tf.keras.Model instance.
metrics: A nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
features, labels = inputs
outputs = self.inference_step(features, model)
outputs = tf.nest.map_structure(lambda x: tf.cast(x, tf.float32), outputs)
loss = self.build_losses(
model_outputs=outputs, labels=labels, aux_losses=model.losses)
logs = {self.loss: loss}
if metrics:
self.process_metrics(metrics, labels, outputs)
return logs
def inference_step(self, inputs: tf.Tensor, model: tf.keras.Model) -> Any:
"""Performs the forward step. It is used in validation_step."""
return model(inputs, training=False)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""All necessary imports for registration.
Custom models, task, configs, etc need to be imported to registry so they can be
picked up by the trainer. They can be included in this file so you do not need
to handle each file separately.
"""
# pylint: disable=unused-import
from official.common import registry_imports
from official.vision.beta.projects.example import example_config
from official.vision.beta.projects.example import example_input
from official.vision.beta.projects.example import example_model
from official.vision.beta.projects.example import example_task
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TensorFlow Model Garden Vision trainer.
All custom registry are imported from registry_imports. Here we use default
trainer so we directly call train.main. If you need to customize the trainer,
branch from `official/vision/beta/train.py` and make changes.
"""
from absl import app
from official.common import flags as tfm_flags
from official.vision.beta import train
from official.vision.beta.projects.example import registry_imports # pylint: disable=unused-import
if __name__ == '__main__':
tfm_flags.define_flags()
app.run(train.main)
......@@ -44,6 +44,13 @@ class Movinet(hyperparams.Config):
# 2plus1d: (2+1)D convolution with Conv2D (2D reshaping)
# 3d_2plus1d: (2+1)D convolution with Conv3D (no 2D reshaping)
conv_type: str = '3d'
# Choose from ['3d', '2d', '2plus3d']
# 3d: default 3D global average pooling.
# 2d: 2D global average pooling.
# 2plus3d: concatenation of 2D and 3D global average pooling.
se_type: str = '3d'
activation: str = 'swish'
gating_activation: str = 'sigmoid'
stochastic_depth_drop_rate: float = 0.2
use_external_states: bool = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment