Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
...@@ -15,11 +15,13 @@ tensorflow-addons ...@@ -15,11 +15,13 @@ tensorflow-addons
dataclasses dataclasses
gin-config gin-config
tf_slim>=1.1.0 tf_slim>=1.1.0
typing
sentencepiece
Cython Cython
matplotlib matplotlib
opencv-python-headless
pyyaml pyyaml
# CV related dependencies
opencv-python-headless
Pillow Pillow
-e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI -e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI
# NLP related dependencies
seqeval
sentencepiece
...@@ -48,6 +48,22 @@ so the checkpoints are not compatible. ...@@ -48,6 +48,22 @@ so the checkpoints are not compatible.
We will unify the implementation soon. We will unify the implementation soon.
### Train a SpineNet-49 based RetinaNet.
```bash
TPU_NAME="<your GCP TPU name>"
MODEL_DIR="<path to the directory to store model files>"
TRAIN_FILE_PATTERN="<path to the TFRecord training data>"
EVAL_FILE_PATTERN="<path to the TFRecord validation data>"
VAL_JSON_FILE="<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py \
--strategy_type=tpu \
--tpu="${TPU_NAME?}" \
--model_dir="${MODEL_DIR?}" \
--mode=train \
--params_override="{ type: retinanet, architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern: ${TRAIN_FILE_PATTERN?} }, eval: { val_json_file: ${VAL_JSON_FILE?}, eval_file_pattern: ${EVAL_FILE_PATTERN?} } }"
```
### Train a custom RetinaNet using the config file. ### Train a custom RetinaNet using the config file.
...@@ -163,6 +179,24 @@ so the checkpoints are not compatible. ...@@ -163,6 +179,24 @@ so the checkpoints are not compatible.
We will unify the implementation soon. We will unify the implementation soon.
### Train a SpineNet-49 based Mask R-CNN.
```bash
TPU_NAME="<your GCP TPU name>"
MODEL_DIR="<path to the directory to store model files>"
TRAIN_FILE_PATTERN="<path to the TFRecord training data>"
EVAL_FILE_PATTERN="<path to the TFRecord validation data>"
VAL_JSON_FILE="<path to the validation annotation JSON file>"
python3 ~/models/official/vision/detection/main.py \
--strategy_type=tpu \
--tpu="${TPU_NAME?}" \
--model_dir="${MODEL_DIR?}" \
--mode=train \
--model=mask_rcnn \
--params_override="{architecture: {backbone: spinenet, multilevel_features: identity}, spinenet: {model_id: 49}, train_file_pattern: ${TRAIN_FILE_PATTERN?} }, eval: { val_json_file: ${VAL_JSON_FILE?}, eval_file_pattern: ${EVAL_FILE_PATTERN?} } }"
```
### Train a custom Mask R-CNN using the config file. ### Train a custom Mask R-CNN using the config file.
First, create a YAML config file, e.g. *my_maskrcnn.yaml*. First, create a YAML config file, e.g. *my_maskrcnn.yaml*.
......
...@@ -17,10 +17,12 @@ ...@@ -17,10 +17,12 @@
BACKBONES = [ BACKBONES = [
'resnet', 'resnet',
'spinenet',
] ]
MULTILEVEL_FEATURES = [ MULTILEVEL_FEATURES = [
'fpn', 'fpn',
'identity',
] ]
# pylint: disable=line-too-long # pylint: disable=line-too-long
...@@ -118,6 +120,9 @@ BASE_CFG = { ...@@ -118,6 +120,9 @@ BASE_CFG = {
'resnet': { 'resnet': {
'resnet_depth': 50, 'resnet_depth': 50,
}, },
'spinenet': {
'model_id': '49',
},
'fpn': { 'fpn': {
'fpn_feat_dims': 256, 'fpn_feat_dims': 256,
'use_separable_conv': False, 'use_separable_conv': False,
......
...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads ...@@ -23,6 +23,7 @@ from official.vision.detection.modeling.architecture import heads
from official.vision.detection.modeling.architecture import identity from official.vision.detection.modeling.architecture import identity
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.modeling.architecture import resnet from official.vision.detection.modeling.architecture import resnet
from official.vision.detection.modeling.architecture import spinenet
def norm_activation_generator(params): def norm_activation_generator(params):
...@@ -42,6 +43,9 @@ def backbone_generator(params): ...@@ -42,6 +43,9 @@ def backbone_generator(params):
activation=params.norm_activation.activation, activation=params.norm_activation.activation,
norm_activation=norm_activation_generator( norm_activation=norm_activation_generator(
params.norm_activation)) params.norm_activation))
elif params.architecture.backbone == 'spinenet':
spinenet_params = params.spinenet
backbone_fn = spinenet.SpineNetBuilder(model_id=spinenet_params.model_id)
else: else:
raise ValueError('Backbone model `{}` is not supported.' raise ValueError('Backbone model `{}` is not supported.'
.format(params.architecture.backbone)) .format(params.architecture.backbone))
......
...@@ -28,7 +28,7 @@ import functools ...@@ -28,7 +28,7 @@ import functools
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.ops import spatial_transform_ops from official.vision.detection.ops import spatial_transform_ops
...@@ -120,7 +120,7 @@ class Fpn(object): ...@@ -120,7 +120,7 @@ class Fpn(object):
'The minimum backbone level %d should be '%(min(input_levels)) + 'The minimum backbone level %d should be '%(min(input_levels)) +
'less or equal to FPN minimum level %d.:'%(self._min_level)) 'less or equal to FPN minimum level %d.:'%(self._min_level))
backbone_max_level = min(max(input_levels), self._max_level) backbone_max_level = min(max(input_levels), self._max_level)
with backend.get_graph().as_default(), tf.name_scope('fpn'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope('fpn'):
# Adds lateral connections. # Adds lateral connections.
feats_lateral = {} feats_lateral = {}
for level in range(self._min_level, backbone_max_level + 1): for level in range(self._min_level, backbone_max_level + 1):
......
...@@ -22,7 +22,8 @@ import functools ...@@ -22,7 +22,8 @@ import functools
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
from official.vision.detection.ops import spatial_transform_ops from official.vision.detection.ops import spatial_transform_ops
...@@ -127,7 +128,7 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -127,7 +128,7 @@ class RpnHead(tf.keras.layers.Layer):
scores_outputs = {} scores_outputs = {}
box_outputs = {} box_outputs = {}
with backend.get_graph().as_default(), tf.name_scope('rpn_head'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope('rpn_head'):
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
scores_output, box_output = self._shared_rpn_heads( scores_output, box_output = self._shared_rpn_heads(
features[level], self._anchors_per_location, level, is_training) features[level], self._anchors_per_location, level, is_training)
...@@ -249,7 +250,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -249,7 +250,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
predictions. predictions.
""" """
with backend.get_graph().as_default(), tf.name_scope('fast_rcnn_head'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
'fast_rcnn_head'):
# reshape inputs beofre FC. # reshape inputs beofre FC.
_, num_rois, height, width, filters = roi_features.get_shape().as_list() _, num_rois, height, width, filters = roi_features.get_shape().as_list()
...@@ -368,7 +370,7 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -368,7 +370,7 @@ class MaskrcnnHead(tf.keras.layers.Layer):
boxes is not 4. boxes is not 4.
""" """
with backend.get_graph().as_default(): with keras_utils.maybe_enter_backend_graph():
with tf.name_scope('mask_head'): with tf.name_scope('mask_head'):
_, num_rois, height, width, filters = roi_features.get_shape().as_list() _, num_rois, height, width, filters = roi_features.get_shape().as_list()
net = tf.reshape(roi_features, [-1, height, width, filters]) net = tf.reshape(roi_features, [-1, height, width, filters])
...@@ -552,7 +554,8 @@ class RetinanetHead(object): ...@@ -552,7 +554,8 @@ class RetinanetHead(object):
"""Returns outputs of RetinaNet head.""" """Returns outputs of RetinaNet head."""
class_outputs = {} class_outputs = {}
box_outputs = {} box_outputs = {}
with backend.get_graph().as_default(), tf.name_scope('retinanet_head'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
'retinanet_head'):
for level in range(self._min_level, self._max_level + 1): for level in range(self._min_level, self._max_level + 1):
features = fpn_features[level] features = fpn_features[level]
...@@ -644,7 +647,7 @@ class ShapemaskPriorHead(object): ...@@ -644,7 +647,7 @@ class ShapemaskPriorHead(object):
detection_priors: A float Tensor of shape [batch_size * num_instances, detection_priors: A float Tensor of shape [batch_size * num_instances,
mask_size, mask_size, 1]. mask_size, mask_size, 1].
""" """
with backend.get_graph().as_default(), tf.name_scope('prior_mask'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope('prior_mask'):
batch_size, num_instances, _ = boxes.get_shape().as_list() batch_size, num_instances, _ = boxes.get_shape().as_list()
outer_boxes = tf.cast(outer_boxes, tf.float32) outer_boxes = tf.cast(outer_boxes, tf.float32)
boxes = tf.cast(boxes, tf.float32) boxes = tf.cast(boxes, tf.float32)
...@@ -807,7 +810,7 @@ class ShapemaskCoarsemaskHead(object): ...@@ -807,7 +810,7 @@ class ShapemaskCoarsemaskHead(object):
mask_outputs: instance mask prediction as a float Tensor of shape mask_outputs: instance mask prediction as a float Tensor of shape
[batch_size, num_instances, mask_size, mask_size]. [batch_size, num_instances, mask_size, mask_size].
""" """
with backend.get_graph().as_default(), tf.name_scope('coarse_mask'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope('coarse_mask'):
# Transform detection priors to have the same dimension as features. # Transform detection priors to have the same dimension as features.
detection_priors = tf.expand_dims(detection_priors, axis=-1) detection_priors = tf.expand_dims(detection_priors, axis=-1)
detection_priors = self._coarse_mask_fc(detection_priors) detection_priors = self._coarse_mask_fc(detection_priors)
...@@ -939,7 +942,7 @@ class ShapemaskFinemaskHead(object): ...@@ -939,7 +942,7 @@ class ShapemaskFinemaskHead(object):
""" """
# Extract the foreground mean features # Extract the foreground mean features
# with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE): # with tf.variable_scope('fine_mask', reuse=tf.AUTO_REUSE):
with backend.get_graph().as_default(), tf.name_scope('fine_mask'): with keras_utils.maybe_enter_backend_graph(), tf.name_scope('fine_mask'):
mask_probs = tf.nn.sigmoid(mask_logits) mask_probs = tf.nn.sigmoid(mask_logits)
# Compute instance embedding for hard average. # Compute instance embedding for hard average.
binary_mask = tf.cast(tf.greater(mask_probs, 0.5), features.dtype) binary_mask = tf.cast(tf.greater(mask_probs, 0.5), features.dtype)
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions to integrate with Keras internals."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.keras import backend
try:
from tensorflow.python.keras.engine import keras_tensor # pylint: disable=g-import-not-at-top,unused-import
except ImportError:
keras_tensor = None
class NoOpContextManager(object):
def __enter__(self):
pass
def __exit__(self, *args):
pass
def maybe_enter_backend_graph():
if (keras_tensor is not None) and keras_tensor.keras_tensors_enabled():
return NoOpContextManager()
else:
return backend.get_graph().as_default()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common building blocks for neural networks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.modeling import tf_utils
@tf.keras.utils.register_keras_serializable(package='Vision')
class ResidualBlock(tf.keras.layers.Layer):
"""A residual block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A residual block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(ResidualBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=1,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(ResidualBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(ResidualBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
return self._activation_fn(x + shortcut)
@tf.keras.utils.register_keras_serializable(package='Vision')
class BottleneckBlock(tf.keras.layers.Layer):
"""A standard bottleneck block."""
def __init__(self,
filters,
strides,
use_projection=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""A standard bottleneck block with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
kernel_initializer: kernel_initializer for convolutional layers.
kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
Default to None.
bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
Default to None.
activation: `str` name of the activation function.
use_sync_bn: if True, use synchronized batch normalization.
norm_momentum: `float` normalization omentum for the moving average.
norm_epsilon: `float` small float added to variance to avoid dividing by
zero.
**kwargs: keyword arguments to be passed.
"""
super(BottleneckBlock, self).__init__(**kwargs)
self._filters = filters
self._strides = strides
self._use_projection = use_projection
self._use_sync_bn = use_sync_bn
self._activation = activation
self._kernel_initializer = kernel_initializer
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
if use_sync_bn:
self._norm = tf.keras.layers.experimental.SyncBatchNormalization
else:
self._norm = tf.keras.layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
self._activation_fn = tf_utils.get_activation(activation)
def build(self, input_shape):
if self._use_projection:
self._shortcut = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=self._strides,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm0 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv1 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm1 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv2 = tf.keras.layers.Conv2D(
filters=self._filters,
kernel_size=3,
strides=self._strides,
padding='same',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm2 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
self._conv3 = tf.keras.layers.Conv2D(
filters=self._filters * 4,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)
self._norm3 = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)
super(BottleneckBlock, self).build(input_shape)
def get_config(self):
config = {
'filters': self._filters,
'strides': self._strides,
'use_projection': self._use_projection,
'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer,
'activation': self._activation,
'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum,
'norm_epsilon': self._norm_epsilon
}
base_config = super(BottleneckBlock, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs):
shortcut = inputs
if self._use_projection:
shortcut = self._shortcut(shortcut)
shortcut = self._norm0(shortcut)
x = self._conv1(inputs)
x = self._norm1(x)
x = self._activation_fn(x)
x = self._conv2(x)
x = self._norm2(x)
x = self._activation_fn(x)
x = self._conv3(x)
x = self._norm3(x)
return self._activation_fn(x + shortcut)
...@@ -25,7 +25,7 @@ from __future__ import print_function ...@@ -25,7 +25,7 @@ from __future__ import print_function
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call. # TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
...@@ -90,7 +90,7 @@ class Resnet(object): ...@@ -90,7 +90,7 @@ class Resnet(object):
The values are corresponding feature hierarchy in ResNet with shape The values are corresponding feature hierarchy in ResNet with shape
[batch_size, height_l, width_l, num_filters]. [batch_size, height_l, width_l, num_filters].
""" """
with backend.get_graph().as_default(): with keras_utils.maybe_enter_backend_graph():
with tf.name_scope('resnet%s' % self._resnet_depth): with tf.name_scope('resnet%s' % self._resnet_depth):
return self._resnet_fn(inputs, is_training) return self._resnet_fn(inputs, is_training)
......
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of SpineNet model.
X. Du, T-Y. Lin, P. Jin, G. Ghiasi, M. Tan, Y. Cui, Q. V. Le, X. Song
SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization
https://arxiv.org/abs/1912.05027
"""
import math
from absl import logging
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_blocks
layers = tf.keras.layers
FILTER_SIZE_MAP = {
1: 32,
2: 64,
3: 128,
4: 256,
5: 256,
6: 256,
7: 256,
}
# The fixed SpineNet architecture discovered by NAS.
# Each element represents a specification of a building block:
# (block_level, block_fn, (input_offset0, input_offset1), is_output).
SPINENET_BLOCK_SPECS = [
(2, 'bottleneck', (0, 1), False),
(4, 'residual', (0, 1), False),
(3, 'bottleneck', (2, 3), False),
(4, 'bottleneck', (2, 4), False),
(6, 'residual', (3, 5), False),
(4, 'bottleneck', (3, 5), False),
(5, 'residual', (6, 7), False),
(7, 'residual', (6, 8), False),
(5, 'bottleneck', (8, 9), False),
(5, 'bottleneck', (8, 10), False),
(4, 'bottleneck', (5, 10), True),
(3, 'bottleneck', (4, 10), True),
(5, 'bottleneck', (7, 12), True),
(7, 'bottleneck', (5, 14), True),
(6, 'bottleneck', (12, 14), True),
]
SCALING_MAP = {
'49S': {
'endpoints_num_filters': 128,
'filter_size_scale': 0.65,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'49': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 1,
},
'96': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 0.5,
'block_repeats': 2,
},
'143': {
'endpoints_num_filters': 256,
'filter_size_scale': 1.0,
'resample_alpha': 1.0,
'block_repeats': 3,
},
'190': {
'endpoints_num_filters': 512,
'filter_size_scale': 1.3,
'resample_alpha': 1.0,
'block_repeats': 4,
},
}
class BlockSpec(object):
"""A container class that specifies the block configuration for SpineNet."""
def __init__(self, level, block_fn, input_offsets, is_output):
self.level = level
self.block_fn = block_fn
self.input_offsets = input_offsets
self.is_output = is_output
def build_block_specs(block_specs=None):
"""Builds the list of BlockSpec objects for SpineNet."""
if not block_specs:
block_specs = SPINENET_BLOCK_SPECS
logging.info('Building SpineNet block specs: %s', block_specs)
return [BlockSpec(*b) for b in block_specs]
@tf.keras.utils.register_keras_serializable(package='Vision')
class SpineNet(tf.keras.Model):
"""Class to build SpineNet models."""
def __init__(self,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
endpoints_num_filters=256,
resample_alpha=0.5,
block_repeats=1,
filter_size_scale=1.0,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001,
**kwargs):
"""SpineNet model."""
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = endpoints_num_filters
self._resample_alpha = resample_alpha
self._block_repeats = block_repeats
self._filter_size_scale = filter_size_scale
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
if activation == 'relu':
self._activation = tf.nn.relu
elif activation == 'swish':
self._activation = tf.nn.swish
else:
raise ValueError('Activation {} not implemented.'.format(activation))
self._init_block_fn = 'bottleneck'
self._num_init_blocks = 2
if use_sync_bn:
self._norm = layers.experimental.SyncBatchNormalization
else:
self._norm = layers.BatchNormalization
if tf.keras.backend.image_data_format() == 'channels_last':
self._bn_axis = -1
else:
self._bn_axis = 1
# Build SpineNet.
inputs = tf.keras.Input(shape=input_specs.shape[1:])
net = self._build_stem(inputs=inputs)
net = self._build_scale_permuted_network(
net=net, input_width=input_specs.shape[1])
net = self._build_endpoints(net=net)
super(SpineNet, self).__init__(inputs=inputs, outputs=net)
def _block_group(self,
inputs,
filters,
strides,
block_fn_cand,
block_repeats=1,
name='block_group'):
"""Creates one group of blocks for the SpineNet model."""
block_fn_candidates = {
'bottleneck': nn_blocks.BottleneckBlock,
'residual': nn_blocks.ResidualBlock,
}
block_fn = block_fn_candidates[block_fn_cand]
_, _, _, num_filters = inputs.get_shape().as_list()
if block_fn_cand == 'bottleneck':
use_projection = not (num_filters == (filters * 4) and strides == 1)
else:
use_projection = not (num_filters == filters and strides == 1)
x = block_fn(
filters=filters,
strides=strides,
use_projection=use_projection,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
inputs)
for _ in range(1, block_repeats):
x = block_fn(
filters=filters,
strides=1,
use_projection=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)(
x)
return tf.identity(x, name=name)
def _build_stem(self, inputs):
"""Build SpineNet stem."""
x = layers.Conv2D(
filters=64,
kernel_size=7,
strides=2,
use_bias=False,
padding='same',
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
net = []
# Build the initial level 2 blocks.
for i in range(self._num_init_blocks):
x = self._block_group(
inputs=x,
filters=int(FILTER_SIZE_MAP[2] * self._filter_size_scale),
strides=1,
block_fn_cand=self._init_block_fn,
block_repeats=self._block_repeats,
name='stem_block_{}'.format(i + 1))
net.append(x)
return net
def _build_scale_permuted_network(self,
net,
input_width,
weighted_fusion=False):
"""Build scale-permuted network."""
net_sizes = [int(math.ceil(input_width / 2**2))] * len(net)
net_block_fns = [self._init_block_fn] * len(net)
num_outgoing_connections = [0] * len(net)
endpoints = {}
for i, block_spec in enumerate(self._block_specs):
# Find out specs for the target block.
target_width = int(math.ceil(input_width / 2**block_spec.level))
target_num_filters = int(FILTER_SIZE_MAP[block_spec.level] *
self._filter_size_scale)
target_block_fn = block_spec.block_fn
# Resample then merge input0 and input1.
parents = []
input0 = block_spec.input_offsets[0]
input1 = block_spec.input_offsets[1]
x0 = self._resample_with_alpha(
inputs=net[input0],
input_width=net_sizes[input0],
input_block_fn=net_block_fns[input0],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x0)
num_outgoing_connections[input0] += 1
x1 = self._resample_with_alpha(
inputs=net[input1],
input_width=net_sizes[input1],
input_block_fn=net_block_fns[input1],
target_width=target_width,
target_num_filters=target_num_filters,
target_block_fn=target_block_fn,
alpha=self._resample_alpha)
parents.append(x1)
num_outgoing_connections[input1] += 1
# Merge 0 outdegree blocks to the output block.
if block_spec.is_output:
for j, (j_feat,
j_connections) in enumerate(zip(net, num_outgoing_connections)):
if j_connections == 0 and (j_feat.shape[2] == target_width and
j_feat.shape[3] == x0.shape[3]):
parents.append(j_feat)
num_outgoing_connections[j] += 1
# pylint: disable=g-direct-tensorflow-import
if weighted_fusion:
dtype = parents[0].dtype
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = tf.add_n(parent_weights)
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = tf_utils.get_activation(self._activation)(tf.add_n(parents))
x = self._block_group(
inputs=x,
filters=target_num_filters,
strides=1,
block_fn_cand=target_block_fn,
block_repeats=self._block_repeats,
name='scale_permuted_block_{}'.format(i + 1))
net.append(x)
net_sizes.append(target_width)
net_block_fns.append(target_block_fn)
num_outgoing_connections.append(0)
# Save output feats.
if block_spec.is_output:
if block_spec.level in endpoints:
raise ValueError('Duplicate feats found for output level {}.'.format(
block_spec.level))
if (block_spec.level < self._min_level or
block_spec.level > self._max_level):
raise ValueError('Output level is out of range [{}, {}]'.format(
self._min_level, self._max_level))
endpoints[block_spec.level] = x
return endpoints
def _build_endpoints(self, net):
"""Match filter size for endpoints before sharing conv layers."""
endpoints = {}
for level in range(self._min_level, self._max_level + 1):
x = layers.Conv2D(
filters=self._endpoints_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
net[level])
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
endpoints[level] = x
return endpoints
def _resample_with_alpha(self,
inputs,
input_width,
input_block_fn,
target_width,
target_num_filters,
target_block_fn,
alpha=0.5):
"""Match resolution and feature dimension."""
_, _, _, input_num_filters = inputs.get_shape().as_list()
if input_block_fn == 'bottleneck':
input_num_filters /= 4
new_num_filters = int(input_num_filters * alpha)
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
inputs)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
# Spatial resampling.
if input_width > target_width:
x = layers.Conv2D(
filters=new_num_filters,
kernel_size=3,
strides=2,
padding='SAME',
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
x = tf_utils.get_activation(self._activation)(x)
input_width /= 2
while input_width > target_width:
x = layers.MaxPool2D(pool_size=3, strides=2, padding='SAME')(x)
input_width /= 2
elif input_width < target_width:
scale = target_width // input_width
x = layers.UpSampling2D(size=(scale, scale))(x)
# Last 1x1 conv to match filter size.
if target_block_fn == 'bottleneck':
target_num_filters *= 4
x = layers.Conv2D(
filters=target_num_filters,
kernel_size=1,
strides=1,
use_bias=False,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer)(
x)
x = self._norm(
axis=self._bn_axis,
momentum=self._norm_momentum,
epsilon=self._norm_epsilon)(
x)
return x
class SpineNetBuilder(object):
"""SpineNet builder."""
def __init__(self,
model_id,
input_specs=tf.keras.layers.InputSpec(shape=[None, 640, 640, 3]),
min_level=3,
max_level=7,
block_specs=build_block_specs(),
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
use_sync_bn=False,
norm_momentum=0.99,
norm_epsilon=0.001):
if model_id not in SCALING_MAP:
raise ValueError(
'SpineNet {} is not a valid architecture.'.format(model_id))
scaling_params = SCALING_MAP[model_id]
self._input_specs = input_specs
self._min_level = min_level
self._max_level = max_level
self._block_specs = block_specs
self._endpoints_num_filters = scaling_params['endpoints_num_filters']
self._resample_alpha = scaling_params['resample_alpha']
self._block_repeats = scaling_params['block_repeats']
self._filter_size_scale = scaling_params['filter_size_scale']
self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer
self._activation = activation
self._use_sync_bn = use_sync_bn
self._norm_momentum = norm_momentum
self._norm_epsilon = norm_epsilon
def __call__(self, inputs, is_training=None):
with keras_utils.maybe_enter_backend_graph():
model = SpineNet(
input_specs=self._input_specs,
min_level=self._min_level,
max_level=self._max_level,
block_specs=self._block_specs,
endpoints_num_filters=self._endpoints_num_filters,
resample_alpha=self._resample_alpha,
block_repeats=self._block_repeats,
filter_size_scale=self._filter_size_scale,
kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer,
activation=self._activation,
use_sync_bn=self._use_sync_bn,
norm_momentum=self._norm_momentum,
norm_epsilon=self._norm_epsilon)
return model(inputs)
...@@ -20,13 +20,13 @@ from __future__ import print_function ...@@ -20,13 +20,13 @@ from __future__ import print_function
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend
from official.vision.detection.dataloader import anchor from official.vision.detection.dataloader import anchor
from official.vision.detection.dataloader import mode_keys from official.vision.detection.dataloader import mode_keys
from official.vision.detection.evaluation import factory as eval_factory from official.vision.detection.evaluation import factory as eval_factory
from official.vision.detection.modeling import base_model from official.vision.detection.modeling import base_model
from official.vision.detection.modeling import losses from official.vision.detection.modeling import losses
from official.vision.detection.modeling.architecture import factory from official.vision.detection.modeling.architecture import factory
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.ops import postprocess_ops from official.vision.detection.ops import postprocess_ops
from official.vision.detection.ops import roi_ops from official.vision.detection.ops import roi_ops
from official.vision.detection.ops import spatial_transform_ops from official.vision.detection.ops import spatial_transform_ops
...@@ -297,7 +297,7 @@ class MaskrcnnModel(base_model.Model): ...@@ -297,7 +297,7 @@ class MaskrcnnModel(base_model.Model):
def build_model(self, params, mode): def build_model(self, params, mode):
if self._keras_model is None: if self._keras_model is None:
input_layers = self.build_input_layers(self._params, mode) input_layers = self.build_input_layers(self._params, mode)
with backend.get_graph().as_default(): with keras_utils.maybe_enter_backend_graph():
outputs = self.model_outputs(input_layers, mode) outputs = self.model_outputs(input_layers, mode)
model = tf.keras.models.Model( model = tf.keras.models.Model(
......
...@@ -20,12 +20,12 @@ from __future__ import print_function ...@@ -20,12 +20,12 @@ from __future__ import print_function
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend
from official.vision.detection.dataloader import mode_keys from official.vision.detection.dataloader import mode_keys
from official.vision.detection.evaluation import factory as eval_factory from official.vision.detection.evaluation import factory as eval_factory
from official.vision.detection.modeling import base_model from official.vision.detection.modeling import base_model
from official.vision.detection.modeling import losses from official.vision.detection.modeling import losses
from official.vision.detection.modeling.architecture import factory from official.vision.detection.modeling.architecture import factory
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.ops import postprocess_ops from official.vision.detection.ops import postprocess_ops
...@@ -57,7 +57,7 @@ class RetinanetModel(base_model.Model): ...@@ -57,7 +57,7 @@ class RetinanetModel(base_model.Model):
params.postprocess) params.postprocess)
self._transpose_input = params.train.transpose_input self._transpose_input = params.train.transpose_input
assert not self._transpose_input, 'Transpose input is not supportted.' assert not self._transpose_input, 'Transpose input is not supported.'
# Input layer. # Input layer.
input_shape = ( input_shape = (
params.retinanet_parser.output_size + params.retinanet_parser.output_size +
...@@ -120,7 +120,7 @@ class RetinanetModel(base_model.Model): ...@@ -120,7 +120,7 @@ class RetinanetModel(base_model.Model):
def build_model(self, params, mode=None): def build_model(self, params, mode=None):
if self._keras_model is None: if self._keras_model is None:
with backend.get_graph().as_default(): with keras_utils.maybe_enter_backend_graph():
outputs = self.model_outputs(self._input_layer, mode) outputs = self.model_outputs(self._input_layer, mode)
model = tf.keras.models.Model( model = tf.keras.models.Model(
......
...@@ -20,13 +20,13 @@ from __future__ import print_function ...@@ -20,13 +20,13 @@ from __future__ import print_function
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend
from official.vision.detection.dataloader import anchor from official.vision.detection.dataloader import anchor
from official.vision.detection.dataloader import mode_keys from official.vision.detection.dataloader import mode_keys
from official.vision.detection.evaluation import factory as eval_factory from official.vision.detection.evaluation import factory as eval_factory
from official.vision.detection.modeling import base_model from official.vision.detection.modeling import base_model
from official.vision.detection.modeling import losses from official.vision.detection.modeling import losses
from official.vision.detection.modeling.architecture import factory from official.vision.detection.modeling.architecture import factory
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.ops import postprocess_ops from official.vision.detection.ops import postprocess_ops
from official.vision.detection.utils import box_utils from official.vision.detection.utils import box_utils
...@@ -265,7 +265,7 @@ class ShapeMaskModel(base_model.Model): ...@@ -265,7 +265,7 @@ class ShapeMaskModel(base_model.Model):
def build_model(self, params, mode): def build_model(self, params, mode):
if self._keras_model is None: if self._keras_model is None:
input_layers = self.build_input_layers(self._params, mode) input_layers = self.build_input_layers(self._params, mode)
with backend.get_graph().as_default(): with keras_utils.maybe_enter_backend_graph():
outputs = self.model_outputs(input_layers, mode) outputs = self.model_outputs(input_layers, mode)
model = tf.keras.models.Model( model = tf.keras.models.Model(
......
...@@ -339,7 +339,8 @@ def train_and_eval( ...@@ -339,7 +339,8 @@ def train_and_eval(
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=params.model.optimizer.name, optimizer_name=params.model.optimizer.name,
base_learning_rate=learning_rate, base_learning_rate=learning_rate,
params=params.model.optimizer.as_dict()) params=params.model.optimizer.as_dict(),
model=model)
metrics_map = _get_metrics(one_hot) metrics_map = _get_metrics(one_hot)
metrics = [metrics_map[metric] for metric in params.train.metrics] metrics = [metrics_map[metric] for metric in params.train.metrics]
......
...@@ -18,11 +18,12 @@ from __future__ import division ...@@ -18,11 +18,12 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
from typing import Any, Dict, Text, List
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
import tensorflow_addons as tfa import tensorflow_addons as tfa
from typing import Any, Dict, Text, List
from official.vision.image_classification import learning_rate from official.vision.image_classification import learning_rate
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer): ...@@ -250,7 +251,8 @@ class MovingAverage(tf.keras.optimizers.Optimizer):
def build_optimizer( def build_optimizer(
optimizer_name: Text, optimizer_name: Text,
base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule, base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
params: Dict[Text, Any]): params: Dict[Text, Any],
model: tf.keras.Model = None):
"""Build the optimizer based on name. """Build the optimizer based on name.
Args: Args:
...@@ -261,6 +263,8 @@ def build_optimizer( ...@@ -261,6 +263,8 @@ def build_optimizer(
params: String -> Any dictionary representing the optimizer params. params: String -> Any dictionary representing the optimizer params.
This should contain optimizer specific parameters such as This should contain optimizer specific parameters such as
`base_learning_rate`, `decay`, etc. `base_learning_rate`, `decay`, etc.
model: The `tf.keras.Model`. This is used for the shadow copy if using
`MovingAverage`.
Returns: Returns:
A tf.keras.Optimizer. A tf.keras.Optimizer.
...@@ -322,10 +326,13 @@ def build_optimizer( ...@@ -322,10 +326,13 @@ def build_optimizer(
# Moving average should be applied last, as it's applied at test time # Moving average should be applied last, as it's applied at test time
moving_average_decay = params.get('moving_average_decay', 0.) moving_average_decay = params.get('moving_average_decay', 0.)
if moving_average_decay is not None and moving_average_decay > 0.: if moving_average_decay is not None and moving_average_decay > 0.:
if model is None:
raise ValueError('`model` must be provided if using `MovingAverage`.')
logging.info('Including moving average decay.') logging.info('Including moving average decay.')
optimizer = MovingAverage( optimizer = MovingAverage(
optimizer, optimizer=optimizer,
average_decay=moving_average_decay) average_decay=moving_average_decay)
optimizer.shadow_copy(model)
return optimizer return optimizer
......
...@@ -19,15 +19,21 @@ from __future__ import division ...@@ -19,15 +19,21 @@ from __future__ import division
# from __future__ import google_type_annotations # from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
import tensorflow as tf
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf
from official.vision.image_classification import optimizer_factory from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs from official.vision.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
def build_toy_model(self) -> tf.keras.Model:
"""Creates a toy `tf.Keras.Model`."""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
return model
@parameterized.named_parameters( @parameterized.named_parameters(
('sgd', 'sgd', 0., False), ('sgd', 'sgd', 0., False),
('momentum', 'momentum', 0., False), ('momentum', 'momentum', 0., False),
...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -40,6 +46,7 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
('rmsprop_ema', 'rmsprop', 0.999, False)) ('rmsprop_ema', 'rmsprop', 0.999, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead): def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors.""" """Smoke test to be sure no syntax errors."""
model = self.build_toy_model()
params = { params = {
'learning_rate': 0.001, 'learning_rate': 0.001,
'rho': 0.09, 'rho': 0.09,
...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -51,7 +58,8 @@ class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
optimizer = optimizer_factory.build_optimizer( optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name, optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'], base_learning_rate=params['learning_rate'],
params=params) params=params,
model=model)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer)) self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self): def test_unknown_optimizer(self):
......
...@@ -255,7 +255,7 @@ def define_keras_flags( ...@@ -255,7 +255,7 @@ def define_keras_flags(
name='tpu', default='', help='TPU address to connect to.') name='tpu', default='', help='TPU address to connect to.')
flags.DEFINE_integer( flags.DEFINE_integer(
name='steps_per_loop', name='steps_per_loop',
default=500, default=None,
help='Number of steps per training loop. Only training step happens ' help='Number of steps per training loop. Only training step happens '
'inside the loop. Callbacks will not be called inside. Will be capped at ' 'inside the loop. Callbacks will not be called inside. Will be capped at '
'steps per epoch.') 'steps per epoch.')
......
...@@ -14,18 +14,16 @@ ...@@ -14,18 +14,16 @@
# ============================================================================== # ==============================================================================
"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" """Runs a ResNet model on the ImageNet dataset using custom training loops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math import math
import os
from absl import app from absl import app
from absl import flags from absl import flags
from absl import logging from absl import logging
import orbit
import tensorflow as tf import tensorflow as tf
from official.modeling import performance from official.modeling import performance
from official.staging.training import controller
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
...@@ -87,15 +85,6 @@ def get_num_train_iterations(flags_obj): ...@@ -87,15 +85,6 @@ def get_num_train_iterations(flags_obj):
return train_steps, train_epochs, eval_steps return train_steps, train_epochs, eval_steps
def _steps_to_run(steps_in_current_epoch, steps_per_epoch, steps_per_loop):
"""Calculates steps to run on device."""
if steps_per_loop <= 0:
raise ValueError('steps_per_loop should be positive integer.')
if steps_per_loop == 1:
return steps_per_loop
return min(steps_per_loop, steps_per_epoch - steps_in_current_epoch)
def run(flags_obj): def run(flags_obj):
"""Run ResNet ImageNet training and eval loop using custom training loops. """Run ResNet ImageNet training and eval loop using custom training loops.
...@@ -121,7 +110,6 @@ def run(flags_obj): ...@@ -121,7 +110,6 @@ def run(flags_obj):
datasets_num_private_threads=flags_obj.datasets_num_private_threads) datasets_num_private_threads=flags_obj.datasets_num_private_threads)
common.set_cudnn_batchnorm_mode() common.set_cudnn_batchnorm_mode()
# TODO(anj-s): Set data_format without using Keras.
data_format = flags_obj.data_format data_format = flags_obj.data_format
if data_format is None: if data_format is None:
data_format = ('channels_first' if tf.config.list_physical_devices('GPU') data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
...@@ -137,7 +125,14 @@ def run(flags_obj): ...@@ -137,7 +125,14 @@ def run(flags_obj):
per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations( per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations(
flags_obj) flags_obj)
steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps) if flags_obj.steps_per_loop is None:
steps_per_loop = per_epoch_steps
elif flags_obj.steps_per_loop > per_epoch_steps:
steps_per_loop = per_epoch_steps
logging.warn('Setting steps_per_loop to %d to respect epoch boundary.',
steps_per_loop)
else:
steps_per_loop = flags_obj.steps_per_loop
logging.info( logging.info(
'Training %d epochs, each epoch has %d steps, ' 'Training %d epochs, each epoch has %d steps, '
...@@ -154,8 +149,8 @@ def run(flags_obj): ...@@ -154,8 +149,8 @@ def run(flags_obj):
eval_interval = flags_obj.epochs_between_evals * per_epoch_steps eval_interval = flags_obj.epochs_between_evals * per_epoch_steps
checkpoint_interval = ( checkpoint_interval = (
per_epoch_steps if flags_obj.enable_checkpoint_and_export else None) steps_per_loop * 5 if flags_obj.enable_checkpoint_and_export else None)
summary_interval = per_epoch_steps if flags_obj.enable_tensorboard else None summary_interval = steps_per_loop if flags_obj.enable_tensorboard else None
checkpoint_manager = tf.train.CheckpointManager( checkpoint_manager = tf.train.CheckpointManager(
runnable.checkpoint, runnable.checkpoint,
...@@ -164,20 +159,24 @@ def run(flags_obj): ...@@ -164,20 +159,24 @@ def run(flags_obj):
step_counter=runnable.global_step, step_counter=runnable.global_step,
checkpoint_interval=checkpoint_interval) checkpoint_interval=checkpoint_interval)
resnet_controller = controller.Controller( resnet_controller = orbit.Controller(
strategy, strategy,
runnable.train, runnable,
runnable.evaluate if not flags_obj.skip_eval else None, runnable if not flags_obj.skip_eval else None,
global_step=runnable.global_step, global_step=runnable.global_step,
steps_per_loop=steps_per_loop, steps_per_loop=steps_per_loop,
train_steps=per_epoch_steps * train_epochs,
checkpoint_manager=checkpoint_manager, checkpoint_manager=checkpoint_manager,
summary_interval=summary_interval, summary_interval=summary_interval,
eval_steps=eval_steps, eval_summary_dir=os.path.join(flags_obj.model_dir, 'eval'))
eval_interval=eval_interval)
time_callback.on_train_begin() time_callback.on_train_begin()
resnet_controller.train(evaluate=not flags_obj.skip_eval) if not flags_obj.skip_eval:
resnet_controller.train_and_evaluate(
train_steps=per_epoch_steps * train_epochs,
eval_steps=eval_steps,
eval_interval=eval_interval)
else:
resnet_controller.train(steps=per_epoch_steps * train_epochs)
time_callback.on_train_end() time_callback.on_train_end()
stats = build_stats(runnable, time_callback) stats = build_stats(runnable, time_callback)
......
...@@ -14,33 +14,21 @@ ...@@ -14,33 +14,21 @@
# ============================================================================== # ==============================================================================
"""Runs a ResNet model on the ImageNet dataset using custom training loops.""" """Runs a ResNet model on the ImageNet dataset using custom training loops."""
from __future__ import absolute_import import orbit
from __future__ import division
from __future__ import print_function
import tensorflow as tf import tensorflow as tf
from official.modeling import performance from official.modeling import performance
from official.staging.training import grad_utils from official.staging.training import grad_utils
from official.staging.training import standard_runnable
from official.staging.training import utils
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.vision.image_classification.resnet import common from official.vision.image_classification.resnet import common
from official.vision.image_classification.resnet import imagenet_preprocessing from official.vision.image_classification.resnet import imagenet_preprocessing
from official.vision.image_classification.resnet import resnet_model from official.vision.image_classification.resnet import resnet_model
class ResnetRunnable(standard_runnable.StandardTrainable, class ResnetRunnable(orbit.StandardTrainer, orbit.StandardEvaluator):
standard_runnable.StandardEvaluable):
"""Implements the training and evaluation APIs for Resnet model.""" """Implements the training and evaluation APIs for Resnet model."""
def __init__(self, flags_obj, time_callback, epoch_steps): def __init__(self, flags_obj, time_callback, epoch_steps):
standard_runnable.StandardTrainable.__init__(self,
flags_obj.use_tf_while_loop,
flags_obj.use_tf_function)
standard_runnable.StandardEvaluable.__init__(self,
flags_obj.use_tf_function)
self.strategy = tf.distribute.get_strategy() self.strategy = tf.distribute.get_strategy()
self.flags_obj = flags_obj self.flags_obj = flags_obj
self.dtype = flags_core.get_tf_dtype(flags_obj) self.dtype = flags_core.get_tf_dtype(flags_obj)
...@@ -107,11 +95,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable, ...@@ -107,11 +95,8 @@ class ResnetRunnable(standard_runnable.StandardTrainable,
# Handling epochs. # Handling epochs.
self.epoch_steps = epoch_steps self.epoch_steps = epoch_steps
self.epoch_helper = utils.EpochHelper(epoch_steps, self.global_step) self.epoch_helper = orbit.utils.EpochHelper(epoch_steps, self.global_step)
train_dataset = orbit.utils.make_distributed_dataset(
def build_train_dataset(self):
"""See base class."""
return utils.make_distributed_dataset(
self.strategy, self.strategy,
self.input_fn, self.input_fn,
is_training=True, is_training=True,
...@@ -122,17 +107,20 @@ class ResnetRunnable(standard_runnable.StandardTrainable, ...@@ -122,17 +107,20 @@ class ResnetRunnable(standard_runnable.StandardTrainable,
.datasets_num_private_threads, .datasets_num_private_threads,
dtype=self.dtype, dtype=self.dtype,
drop_remainder=True) drop_remainder=True)
orbit.StandardTrainer.__init__(self, train_dataset,
def build_eval_dataset(self): flags_obj.use_tf_while_loop,
"""See base class.""" flags_obj.use_tf_function)
return utils.make_distributed_dataset( if not flags_obj.skip_eval:
self.strategy, eval_dataset = orbit.utils.make_distributed_dataset(
self.input_fn, self.strategy,
is_training=False, self.input_fn,
data_dir=self.flags_obj.data_dir, is_training=False,
batch_size=self.batch_size, data_dir=self.flags_obj.data_dir,
parse_record_fn=imagenet_preprocessing.parse_record, batch_size=self.batch_size,
dtype=self.dtype) parse_record_fn=imagenet_preprocessing.parse_record,
dtype=self.dtype)
orbit.StandardEvaluator.__init__(self, eval_dataset,
flags_obj.use_tf_function)
def train_loop_begin(self): def train_loop_begin(self):
"""See base class.""" """See base class."""
......
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment