"vscode:/vscode.git/clone" did not exist on "124020f824f67e2cbadf38374d96934d7bdcc738"
Commit a32ffa95 authored by qianyj's avatar qianyj
Browse files

update TensorFlow2x test method

parent e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base model configuration for CNN benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
import tensorflow.compat.v1 as tf
import convnet_builder
import mlperf
from tensorflow.python.ops import variables as variables_module # pylint: disable=g-direct-tensorflow-import
# BuildNetworkResult encapsulate the result (e.g. logits) of a
# Model.build_network() call.
BuildNetworkResult = namedtuple(
'BuildNetworkResult',
[
'logits', # logits of the network
'extra_info', # Model specific extra information
])
class Model(object):
"""Base model config for DNN benchmarks."""
def __init__(self,
model_name,
batch_size,
learning_rate,
fp16_loss_scale,
params=None):
self.model_name = model_name
self.batch_size = batch_size
self.default_batch_size = batch_size
self.learning_rate = learning_rate
# TODO(reedwm) Set custom loss scales for each model instead of using the
# default of 128.
self.fp16_loss_scale = fp16_loss_scale
# use_tf_layers specifies whether to build the model using tf.layers.
# fp16_vars specifies whether to create the variables in float16.
if params:
self.use_tf_layers = params.use_tf_layers
self.fp16_vars = params.fp16_vars
self.data_type = tf.float16 if params.use_fp16 else tf.float32
else:
self.use_tf_layers = True
self.fp16_vars = False
self.data_type = tf.float32
def get_model_name(self):
return self.model_name
def get_batch_size(self):
return self.batch_size
def set_batch_size(self, batch_size):
self.batch_size = batch_size
def get_default_batch_size(self):
return self.default_batch_size
def get_fp16_loss_scale(self):
return self.fp16_loss_scale
def filter_l2_loss_vars(self, variables):
"""Filters out variables that the L2 loss should not be computed for.
By default, this filters out batch normalization variables and keeps all
other variables. This behavior can be overridden by subclasses.
Args:
variables: A list of the trainable variables.
Returns:
A list of variables that the L2 loss should be computed for.
"""
mlperf.logger.log(key=mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2,
value=True)
return [v for v in variables if 'batchnorm' not in v.name]
def get_learning_rate(self, global_step, batch_size):
del global_step
del batch_size
return self.learning_rate
def get_input_shapes(self, subset):
"""Returns the list of expected shapes of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Returns the list of data types of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_synthetic_inputs(self, input_name, nclass):
"""Returns the ops to generate synthetic inputs."""
raise NotImplementedError('Must be implemented in derived classes')
def build_network(self, inputs, phase_train, nclass):
"""Builds the forward pass of the model.
Args:
inputs: The list of inputs, including labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the inputs can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
raise NotImplementedError('Must be implemented in derived classes')
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
raise NotImplementedError('Must be implemented in derived classes')
# TODO(laigd): have accuracy_function() take build_network_result instead.
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
raise NotImplementedError('Must be implemented in derived classes')
def postprocess(self, results):
"""Postprocess results returned from model in Python."""
return results
def reached_target(self):
"""Define custom methods to stop training when model's target is reached."""
return False
class CNNModel(Model):
"""Base model configuration for CNN benchmarks."""
# TODO(laigd): reduce the number of parameters and read everything from
# params.
def __init__(self,
model,
image_size,
batch_size,
learning_rate,
layer_counts=None,
fp16_loss_scale=128,
params=None):
super(CNNModel, self).__init__(
model, batch_size, learning_rate, fp16_loss_scale,
params=params)
self.image_size = image_size
self.layer_counts = layer_counts
self.depth = 3
self.params = params
self.data_format = params.data_format if params else 'NCHW'
def get_layer_counts(self):
return self.layer_counts
def skip_final_affine_layer(self):
"""Returns if the caller of this class should skip the final affine layer.
Normally, this class adds a final affine layer to the model after calling
self.add_inference(), to generate the logits. If a subclass override this
method to return True, the caller should not add the final affine layer.
This is useful for tests.
"""
return False
def add_backbone_saver(self):
"""Creates a tf.train.Saver as self.backbone_saver for loading backbone.
A tf.train.Saver must be created and saved in self.backbone_saver before
calling load_backbone_model, with correct variable name mapping to load
variables from checkpoint correctly into the current model.
"""
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def load_backbone_model(self, sess, backbone_model_path):
"""Loads variable values from a pre-trained backbone model.
This should be used at the beginning of the training process for transfer
learning models using checkpoints of base models.
Args:
sess: session to train the model.
backbone_model_path: path to backbone model checkpoint file.
"""
del sess, backbone_model_path
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def add_inference(self, cnn):
"""Adds the core layers of the CNN's forward pass.
This should build the forward pass layers, except for the initial transpose
of the images and the final Dense layer producing the logits. The layers
should be build with the ConvNetBuilder `cnn`, so that when this function
returns, `cnn.top_layer` and `cnn.top_size` refer to the last layer and the
number of units of the layer layer, respectively.
Args:
cnn: A ConvNetBuilder to build the forward pass layers with.
"""
del cnn
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Return data types of inputs for the specified subset."""
del subset # Same types for both 'train' and 'validation' subsets.
return [self.data_type, tf.int32]
def get_input_shapes(self, subset):
"""Return data shapes of inputs for the specified subset."""
del subset # Same shapes for both 'train' and 'validation' subsets.
# Each input is of shape [batch_size, height, width, depth]
# Each label is of shape [batch_size]
return [[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size]]
def get_synthetic_inputs(self, input_name, nclass):
# Synthetic input should be within [0, 255].
image_shape, label_shape = self.get_input_shapes('train')
inputs = tf.truncated_normal(
image_shape,
dtype=self.data_type,
mean=127,
stddev=60,
name=self.model_name + '_synthetic_inputs')
inputs = variables_module.VariableV1(
inputs, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
labels = tf.random_uniform(
label_shape,
minval=0,
maxval=nclass - 1,
dtype=tf.int32,
name=self.model_name + '_synthetic_labels')
return (inputs, labels)
def gpu_preprocess_nhwc(self, images, phase_train=True):
del phase_train
return images
def build_network(self,
inputs,
phase_train=True,
nclass=1001):
"""Returns logits from input images.
Args:
inputs: The input images and labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the images can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
images = inputs[0]
images = self.gpu_preprocess_nhwc(images, phase_train)
if self.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
var_type = tf.float32
if self.data_type == tf.float16 and self.fp16_vars:
var_type = tf.float16
network = convnet_builder.ConvNetBuilder(
images, self.depth, phase_train, self.use_tf_layers, self.data_format,
self.data_type, var_type)
with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
self.add_inference(network)
# Add the final fully-connected class layer
logits = (
network.affine(nclass, activation='linear')
if not self.skip_final_affine_layer() else network.top_layer)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_FINAL_SHAPE,
value=logits.shape.as_list()[1:])
aux_logits = None
if network.aux_top_layer is not None:
with network.switch_to_aux_top_layer():
aux_logits = network.affine(nclass, activation='linear', stddev=0.001)
if self.data_type == tf.float16:
# TODO(reedwm): Determine if we should do this cast here.
logits = tf.cast(logits, tf.float32)
if aux_logits is not None:
aux_logits = tf.cast(aux_logits, tf.float32)
return BuildNetworkResult(
logits=logits, extra_info=None if aux_logits is None else aux_logits)
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model."""
logits = build_network_result.logits
_, labels = inputs
# TODO(laigd): consider putting the aux logit in the Inception model,
# which could call super.loss_function twice, once with the normal logits
# and once with the aux logits.
aux_logits = build_network_result.extra_info
with tf.name_scope('xentropy'):
mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
if aux_logits is not None:
with tf.name_scope('aux_xentropy'):
aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=aux_logits, labels=labels)
aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
loss = tf.add_n([loss, aux_loss])
return loss
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
_, labels = inputs
top_1_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 1), self.data_type))
top_5_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 5), self.data_type))
return {'top_1_accuracy': top_1_op, 'top_5_accuracy': top_5_op}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model configurations for CNN benchmarks.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from functools import partial
from models import alexnet_model
from models import densenet_model
from models import googlenet_model
from models import inception_model
from models import lenet_model
from models import official_resnet_model
from models import overfeat_model
from models import resnet_model
from models import trivial_model
from models import vgg_model
from models.experimental import deepspeech
from models.experimental import official_ncf_model
_model_name_to_imagenet_model = {
'vgg11': vgg_model.Vgg11Model,
'vgg16': vgg_model.Vgg16Model,
'vgg19': vgg_model.Vgg19Model,
'lenet': lenet_model.Lenet5Model,
'googlenet': googlenet_model.GooglenetModel,
'overfeat': overfeat_model.OverfeatModel,
'alexnet': alexnet_model.AlexnetModel,
'trivial': trivial_model.TrivialModel,
'inception3': inception_model.Inceptionv3Model,
'inception4': inception_model.Inceptionv4Model,
'official_resnet18_v2':
partial(official_resnet_model.ImagenetResnetModel, 18),
'official_resnet34_v2':
partial(official_resnet_model.ImagenetResnetModel, 34),
'official_resnet50_v2':
partial(official_resnet_model.ImagenetResnetModel, 50),
'official_resnet101_v2':
partial(official_resnet_model.ImagenetResnetModel, 101),
'official_resnet152_v2':
partial(official_resnet_model.ImagenetResnetModel, 152),
'official_resnet200_v2':
partial(official_resnet_model.ImagenetResnetModel, 200),
'official_resnet18':
partial(official_resnet_model.ImagenetResnetModel, 18, version=1),
'official_resnet34':
partial(official_resnet_model.ImagenetResnetModel, 34, version=1),
'official_resnet50':
partial(official_resnet_model.ImagenetResnetModel, 50, version=1),
'official_resnet101':
partial(official_resnet_model.ImagenetResnetModel, 101, version=1),
'official_resnet152':
partial(official_resnet_model.ImagenetResnetModel, 152, version=1),
'official_resnet200':
partial(official_resnet_model.ImagenetResnetModel, 200, version=1),
'resnet50': resnet_model.create_resnet50_model,
'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model,
'resnet50_v2': resnet_model.create_resnet50_v2_model,
'resnet101': resnet_model.create_resnet101_model,
'resnet101_v2': resnet_model.create_resnet101_v2_model,
'resnet152': resnet_model.create_resnet152_model,
'resnet152_v2': resnet_model.create_resnet152_v2_model,
'ncf': official_ncf_model.NcfModel,
}
_model_name_to_cifar_model = {
'alexnet': alexnet_model.AlexnetCifar10Model,
'resnet20': resnet_model.create_resnet20_cifar_model,
'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model,
'resnet32': resnet_model.create_resnet32_cifar_model,
'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model,
'resnet44': resnet_model.create_resnet44_cifar_model,
'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model,
'resnet56': resnet_model.create_resnet56_cifar_model,
'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model,
'resnet110': resnet_model.create_resnet110_cifar_model,
'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model,
'trivial': trivial_model.TrivialCifar10Model,
'densenet40_k12': densenet_model.create_densenet40_k12_model,
'densenet100_k12': densenet_model.create_densenet100_k12_model,
'densenet100_k24': densenet_model.create_densenet100_k24_model,
}
_model_name_to_object_detection_model = {
'trivial': trivial_model.TrivialSSD300Model,
}
def _get_model_map(dataset_name):
"""Get name to model map for specified dataset."""
if dataset_name == 'cifar10':
return _model_name_to_cifar_model
elif dataset_name in ('imagenet', 'synthetic'):
return _model_name_to_imagenet_model
elif dataset_name == 'librispeech':
return {'deepspeech2': deepspeech.DeepSpeech2Model}
elif dataset_name == 'coco':
return _model_name_to_object_detection_model
else:
raise ValueError('Invalid dataset name: %s' % dataset_name)
# A model map dict can have this string as a value when TF2 is used, to indicate
# the model is only available in TF1.
_TF1_ONLY_STRING = 'TF1_ONLY'
def get_model_config(model_name, dataset, params):
"""Map model name to model network configuration."""
model_map = _get_model_map(dataset.name)
if model_name not in model_map:
raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
(model_name, dataset.name))
model = model_map[model_name](params=params)
if model == 'TF1_ONLY':
raise ValueError('Model \'%s\' can only be used with TensorFlow 1'
% (model_name,))
return model
def register_model(model_name, dataset_name, model_func):
"""Register a new model that can be obtained with `get_model_config`."""
model_map = _get_model_map(dataset_name)
if model_name in model_map:
raise ValueError('Model "%s" is already registered for dataset "%s"' %
(model_name, dataset_name))
model_map[model_name] = model_func
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import slim # pylint: disable=unused-import
can_import_contrib = True
except ImportError:
can_import_contrib = False
def register_tf1_models():
"""Registers all the TensorFlow 1-only models.
TF 1-only models use contrib, which was removed in TF 2. If contrib can be
imported, the TF 1-only models are registered normally. If contrib cannot be
imported, the models are registered with the 'TF1_ONLY' string instead, which
will cause an error to be thrown if these models are used.
"""
if can_import_contrib:
from models.tf1_only import mobilenet_v2
from models.tf1_only import nasnet_model
from models.tf1_only import ssd_model
register_model('mobilenet', 'imagenet', mobilenet_v2.MobilenetModel)
register_model('nasnet', 'imagenet', nasnet_model.NasnetModel)
register_model('nasnetlarge', 'imagenet', nasnet_model.NasnetLargeModel)
register_model('nasnet', 'cifar10', nasnet_model.NasnetCifarModel)
register_model('ssd300', 'coco', ssd_model.SSD300Model)
else:
register_model('mobilenet', 'imagenet', 'TF1_ONLY')
register_model('nasnet', 'imagenet', 'TF1_ONLY')
register_model('nasnetlarge', 'imagenet', 'TF1_ONLY')
register_model('nasnet', 'cifar10', 'TF1_ONLY')
register_model('ssd300', 'coco', 'TF1_ONLY')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import official resnet models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
import datasets
from models import model as model_lib
class ImagenetResnetModel(model_lib.CNNModel):
"""Official resnet models."""
def __init__(self, resnet_size, version=2, params=None):
"""These are the parameters that work for Imagenet data.
Args:
resnet_size: The number of convolutional layers needed in the model.
version: 1 or 2 for v1 or v2, respectively.
params: params passed by BenchmarkCNN.
"""
default_batch_sizes = {
50: 128,
101: 32,
152: 32
}
batch_size = default_batch_sizes.get(resnet_size, 32)
default_learning_rate = 0.0125 * batch_size / 32
model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
super(ImagenetResnetModel, self).__init__(
model_name, 224, batch_size, default_learning_rate, params=params)
self.resnet_size = resnet_size
self.version = version
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = (
float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
values = [1, 0.1, 0.01, 0.001, 0.0001]
adjusted_learning_rate = (
self.learning_rate / self.default_batch_size * batch_size)
values = [v * adjusted_learning_rate for v in values]
return tf.train.piecewise_constant(global_step, boundaries, values)
def build_network(self, images, phase_train=True, nclass=1001,
data_type=tf.float32):
# pylint: disable=g-import-not-at-top
try:
from official.r1.resnet.imagenet_main import ImagenetModel
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
images = tf.cast(images, data_type)
model_class = ImagenetModel(resnet_size=self.resnet_size,
resnet_version=self.version,
# The official model dtype seems to be ignored,
# as the dtype it uses is the dtype of the input
# images. Doesn't hurt to set it though.
dtype=data_type)
logits = model_class(images, phase_train)
logits = tf.cast(logits, tf.float32)
return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Overfeat model configuration.
References:
OverFeat: Integrated Recognition, Localization and Detection using
Convolutional Networks
Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus,
Yann LeCun, 2014
http://arxiv.org/abs/1312.6229
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from models import model
class OverfeatModel(model.CNNModel):
"""OverfeatModel."""
def __init__(self, params=None):
super(OverfeatModel, self).__init__(
'overfeat', 231, 32, 0.005, params=params)
def add_inference(self, cnn):
# Note: VALID requires padding the images by 3 in width and height
cnn.conv(96, 11, 11, 4, 4, mode='VALID')
cnn.mpool(2, 2)
cnn.conv(256, 5, 5, 1, 1, mode='VALID')
cnn.mpool(2, 2)
cnn.conv(512, 3, 3)
cnn.conv(1024, 3, 3)
cnn.conv(1024, 3, 3)
cnn.mpool(2, 2)
cnn.reshape([-1, 1024 * 6 * 6])
cnn.affine(3072)
cnn.dropout()
cnn.affine(4096)
cnn.dropout()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnet model configuration.
References:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition
arXiv:1512.03385 (2015)
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks
arXiv:1603.05027 (2016)
Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy,
Alan L. Yuille
DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
Atrous Convolution, and Fully Connected CRFs
arXiv:1606.00915 (2016)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
import datasets
import mlperf
from models import model as model_lib
def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v1.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v1'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
cnn.conv(depth_bottleneck, 1, 1, stride, stride,
input_layer=input_layer, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=True, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v1.5.
ResNet v1.5 is the informal name for ResNet v1 where stride 2 is used in the
first 3x3 convolution of each block instead of the first 1x1 convolution.
First seen at https://github.com/facebook/fb.resnet.torch. Used in the paper
"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"
(arXiv:1706.02677v2) and by fast.ai to train to accuracy in 45 epochs using
multiple image sizes.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v1.5'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
cnn.conv(depth_bottleneck, 1, 1, 1, 1,
input_layer=input_layer, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, stride, stride, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=True, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v2.
The main difference from v1 is that a batch norm and relu are done at the
start of the block, instead of the end. This initial batch norm and relu is
collectively called a pre-activation.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v2'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
preact = cnn.batch_norm()
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
preact = tf.nn.relu(preact)
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None, use_batch_norm=False,
input_layer=preact, num_channels_in=in_size, bias=None)
cnn.conv(depth_bottleneck, 1, 1, stride, stride,
input_layer=preact, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=False, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
output = shortcut + res
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block(cnn, depth, depth_bottleneck, stride, version):
"""Bottleneck block with identity short-cut.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
version: version of ResNet to build.
"""
mlperf.logger.log(key=mlperf.tags.MODEL_HP_BLOCK_TYPE,
value=mlperf.tags.BOTTLENECK_BLOCK)
mlperf.logger.log_begin_block(
input_tensor=cnn.top_layer, block_type=mlperf.tags.BOTTLENECK_BLOCK)
if version == 'v2':
bottleneck_block_v2(cnn, depth, depth_bottleneck, stride)
elif version == 'v1.5':
bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride)
else:
bottleneck_block_v1(cnn, depth, depth_bottleneck, stride)
mlperf.logger.log_end_block(output_tensor=cnn.top_layer)
def residual_block(cnn, depth, stride, version, projection_shortcut=False):
"""Residual block with identity short-cut.
Args:
cnn: the network to append residual blocks.
depth: the number of output filters for this residual block.
stride: Stride used in the first layer of the residual block.
version: version of ResNet to build.
projection_shortcut: indicator of using projection shortcut, even if top
size and depth are equal
"""
pre_activation = True if version == 'v2' else False
input_layer = cnn.top_layer
in_size = cnn.top_size
if projection_shortcut:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
elif in_size != depth:
# Plan A of shortcut.
shortcut = cnn.apool(1, 1, stride, stride,
input_layer=input_layer,
num_channels_in=in_size)
padding = (depth - in_size) // 2
if cnn.channel_pos == 'channels_last':
shortcut = tf.pad(
shortcut, [[0, 0], [0, 0], [0, 0], [padding, padding]])
else:
shortcut = tf.pad(
shortcut, [[0, 0], [padding, padding], [0, 0], [0, 0]])
else:
shortcut = input_layer
if pre_activation:
res = cnn.batch_norm(input_layer)
res = tf.nn.relu(res)
else:
res = input_layer
cnn.conv(depth, 3, 3, stride, stride,
input_layer=res, num_channels_in=in_size,
use_batch_norm=True, bias=None)
if pre_activation:
res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
use_batch_norm=False, bias=None)
output = shortcut + res
else:
res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
use_batch_norm=True, bias=None)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
class ResnetModel(model_lib.CNNModel):
"""Resnet cnn network configuration."""
def __init__(self, model, layer_counts, params=None):
default_batch_sizes = {
'resnet50': 64,
'resnet101': 32,
'resnet152': 32,
'resnet50_v1.5': 64,
'resnet101_v1.5': 32,
'resnet152_v1.5': 32,
'resnet50_v2': 64,
'resnet101_v2': 32,
'resnet152_v2': 32,
}
batch_size = default_batch_sizes.get(model, 32)
# The ResNet paper uses a starting lr of .1 at bs=256.
self.base_lr_batch_size = 256
base_lr = 0.128
if params and params.resnet_base_lr:
base_lr = params.resnet_base_lr
super(ResnetModel, self).__init__(model, 224, batch_size, base_lr,
layer_counts, params=params)
if 'v2' in model:
self.version = 'v2'
elif 'v1.5' in model:
self.version = 'v1.5'
else:
self.version = 'v1'
def add_inference(self, cnn):
if self.layer_counts is None:
raise ValueError('Layer counts not specified for %s' % self.get_model())
# Drop batch size from shape logging.
mlperf.logger.log(key=mlperf.tags.MODEL_HP_INITIAL_SHAPE,
value=cnn.top_layer.shape.as_list()[1:])
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
cnn.mpool(3, 3, 2, 2, mode='SAME')
for _ in xrange(self.layer_counts[0]):
bottleneck_block(cnn, 256, 64, 1, self.version)
for i in xrange(self.layer_counts[1]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 512, 128, stride, self.version)
for i in xrange(self.layer_counts[2]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 1024, 256, stride, self.version)
for i in xrange(self.layer_counts[3]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 2048, 512, stride, self.version)
if self.version == 'v2':
cnn.batch_norm()
cnn.top_layer = tf.nn.relu(cnn.top_layer)
cnn.spatial_mean()
def get_learning_rate(self, global_step, batch_size):
rescaled_lr = self.get_scaled_base_learning_rate(batch_size)
num_batches_per_epoch = (
datasets.IMAGENET_NUM_TRAIN_IMAGES / batch_size)
boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
values = [1, 0.1, 0.01, 0.001, 0.0001]
values = [rescaled_lr * v for v in values]
lr = tf.train.piecewise_constant(global_step, boundaries, values)
warmup_steps = int(num_batches_per_epoch * 5)
mlperf.logger.log(key=mlperf.tags.OPT_LR_WARMUP_STEPS, value=warmup_steps)
warmup_lr = (
rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
def get_scaled_base_learning_rate(self, batch_size):
"""Calculates base learning rate for creating lr schedule.
In replicated mode, gradients are summed rather than averaged which, with
the sgd and momentum optimizers, increases the effective learning rate by
lr * num_gpus. Dividing the base lr by num_gpus negates the increase.
Args:
batch_size: Total batch-size.
Returns:
Base learning rate to use to create lr schedule.
"""
base_lr = self.learning_rate
if self.params.variable_update == 'replicated':
base_lr = self.learning_rate / self.params.num_gpus
scaled_lr = base_lr * (batch_size / self.base_lr_batch_size)
return scaled_lr
def create_resnet50_model(params):
return ResnetModel('resnet50', (3, 4, 6, 3), params=params)
def create_resnet50_v1_5_model(params):
return ResnetModel('resnet50_v1.5', (3, 4, 6, 3), params=params)
def create_resnet50_v2_model(params):
return ResnetModel('resnet50_v2', (3, 4, 6, 3), params=params)
def create_resnet101_model(params):
return ResnetModel('resnet101', (3, 4, 23, 3), params=params)
def create_resnet101_v2_model(params):
return ResnetModel('resnet101_v2', (3, 4, 23, 3), params=params)
def create_resnet152_model(params):
return ResnetModel('resnet152', (3, 8, 36, 3), params=params)
def create_resnet152_v2_model(params):
return ResnetModel('resnet152_v2', (3, 8, 36, 3), params=params)
class ResnetCifar10Model(model_lib.CNNModel):
"""Resnet cnn network configuration for Cifar 10 dataset.
V1 model architecture follows the one defined in the paper:
https://arxiv.org/pdf/1512.03385.pdf.
V2 model architecture follows the one defined in the paper:
https://arxiv.org/pdf/1603.05027.pdf.
"""
def __init__(self, model, layer_counts, params=None):
if 'v2' in model:
self.version = 'v2'
else:
self.version = 'v1'
super(ResnetCifar10Model, self).__init__(
model, 32, 128, 0.1, layer_counts, params=params)
def add_inference(self, cnn):
if self.layer_counts is None:
raise ValueError('Layer counts not specified for %s' % self.get_model())
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
if self.version == 'v2':
cnn.conv(16, 3, 3, 1, 1, use_batch_norm=True)
else:
cnn.conv(16, 3, 3, 1, 1, activation=None, use_batch_norm=True)
for i in xrange(self.layer_counts[0]):
# reshape to batch_size x 16 x 32 x 32
residual_block(cnn, 16, 1, self.version)
for i in xrange(self.layer_counts[1]):
# Subsampling is performed at the first convolution with a stride of 2
stride = 2 if i == 0 else 1
# reshape to batch_size x 32 x 16 x 16
residual_block(cnn, 32, stride, self.version)
for i in xrange(self.layer_counts[2]):
stride = 2 if i == 0 else 1
# reshape to batch_size x 64 x 8 x 8
residual_block(cnn, 64, stride, self.version)
if self.version == 'v2':
cnn.batch_norm()
cnn.top_layer = tf.nn.relu(cnn.top_layer)
cnn.spatial_mean()
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = int(50000 / batch_size)
boundaries = num_batches_per_epoch * np.array([82, 123, 300],
dtype=np.int64)
boundaries = [x for x in boundaries]
values = [0.1, 0.01, 0.001, 0.0002]
return tf.train.piecewise_constant(global_step, boundaries, values)
def create_resnet20_cifar_model(params):
return ResnetCifar10Model('resnet20', (3, 3, 3), params=params)
def create_resnet20_v2_cifar_model(params):
return ResnetCifar10Model('resnet20_v2', (3, 3, 3), params=params)
def create_resnet32_cifar_model(params):
return ResnetCifar10Model('resnet32', (5, 5, 5), params=params)
def create_resnet32_v2_cifar_model(params):
return ResnetCifar10Model('resnet32_v2', (5, 5, 5), params=params)
def create_resnet44_cifar_model(params):
return ResnetCifar10Model('resnet44', (7, 7, 7), params=params)
def create_resnet44_v2_cifar_model(params):
return ResnetCifar10Model('resnet44_v2', (7, 7, 7), params=params)
def create_resnet56_cifar_model(params):
return ResnetCifar10Model('resnet56', (9, 9, 9), params=params)
def create_resnet56_v2_cifar_model(params):
return ResnetCifar10Model('resnet56_v2', (9, 9, 9), params=params)
def create_resnet110_cifar_model(params):
return ResnetCifar10Model('resnet110', (18, 18, 18), params=params)
def create_resnet110_v2_cifar_model(params):
return ResnetCifar10Model('resnet110_v2', (18, 18, 18), params=params)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for resnet_model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import mock
import tensorflow.compat.v1 as tf
from models import resnet_model
class ResNetModelTest(tf.test.TestCase):
def testGetScaledBaseLearningRateOneGpuLrFromParams(self):
"""Verifies setting params.resnet_base_lr pipes through."""
lr = self._get_scaled_base_learning_rate(1,
'parameter_server',
256,
base_lr=.050)
self.assertEqual(lr, .050)
def testGetScaledBaseLearningRateOneGpu(self):
lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128)
self.assertEqual(lr, .064)
def testGetScaledBaseLearningRateEightGpuReplicated(self):
lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8)
self.assertEqual(lr, .128)
def testGetScaledBaseLearningRateTwoGpuParameter(self):
lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2)
self.assertEqual(lr, .256)
def testGetScaledBaseLearningRateTwoGpuUneven(self):
lr = self._get_scaled_base_learning_rate(2, 'replicated', 13)
self.assertEqual(lr, 0.0032500000000000003)
def _get_scaled_base_learning_rate(self,
num_gpus,
variable_update,
batch_size,
base_lr=None):
"""Simplifies testing different learning rate calculations.
Args:
num_gpus: Number of GPUs to be used.
variable_update: Type of variable update used.
batch_size: Total batch size.
base_lr: Base learning rate before scaling.
Returns:
Base learning rate that would be used to create lr schedule.
"""
params = mock.Mock()
params.num_gpus = num_gpus
params.variable_update = variable_update
if base_lr:
params.resnet_base_lr = base_lr
resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params)
return resnet50_model.get_scaled_base_learning_rate(batch_size)
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mobilenet Base Class, branched from slim for fp16 performance study."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import contextlib
import copy
import os
import tensorflow.compat.v1 as tf
from tensorflow.contrib import slim as contrib_slim
slim = contrib_slim
@slim.add_arg_scope
def apply_activation(x, name=None, activation_fn=None):
return activation_fn(x, name=name) if activation_fn else x
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
[pad_beg[1], pad_end[1]], [0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
@contextlib.contextmanager
def _set_arg_scope_defaults(defaults):
"""Sets arg scope defaults for all items present in defaults.
Args:
defaults: dictionary/list of pairs, containing a mapping from
function to a dictionary of default args.
Yields:
context manager where all defaults are set.
"""
if hasattr(defaults, 'items'):
items = list(defaults.items())
else:
items = defaults
if not items:
yield
else:
func, default_arg = items[0]
with slim.arg_scope(func, **default_arg):
with _set_arg_scope_defaults(items[1:]):
yield
@slim.add_arg_scope
def depth_multiplier(output_params,
multiplier,
divisible_by=8,
min_depth=8,
**unused_kwargs):
if 'num_outputs' not in output_params:
return
d = output_params['num_outputs']
output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
min_depth)
_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
def op(opfunc, **params):
multiplier = params.pop('multiplier_transorm', depth_multiplier)
return _Op(opfunc, params=params, multiplier_func=multiplier)
class NoOpScope(object):
"""No-op context manager."""
def __enter__(self):
return
def __exit__(self, exc_type, exc_value, traceback):
return False
def safe_arg_scope(funcs, **kwargs):
"""Returns `slim.arg_scope` with all None arguments removed.
Args:
funcs: Functions to pass to `arg_scope`.
**kwargs: Arguments to pass to `arg_scope`.
Returns:
arg_scope or No-op context manager.
Note: can be useful if None value should be interpreted as "do not overwrite
this parameter value".
"""
filtered_args = {name: value for name, value in kwargs.items()
if value is not None}
if filtered_args:
return slim.arg_scope(funcs, **filtered_args)
else:
return NoOpScope()
@slim.add_arg_scope
def mobilenet_base( # pylint: disable=invalid-name
inputs,
conv_defs,
multiplier=1.0,
final_endpoint=None,
output_stride=None,
use_explicit_padding=False,
scope=None,
is_training=False):
"""Mobilenet base network.
Constructs a network from inputs to the given final endpoint. By default
the network is constructed in inference mode. To create network
in training mode use:
with slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_base(...)
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
conv_defs: A list of op(...) layers specifying the net architecture.
multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
final_endpoint: The name of last layer, for early termination for
for V1-based networks: last layer is "layer_14", for V2: "layer_20"
output_stride: An integer that specifies the requested ratio of input to
output spatial resolution. If not None, then we invoke atrous convolution
if necessary to prevent the network from reducing the spatial resolution
of the activation maps. Allowed values are 1 or any even number, excluding
zero. Typical values are 8 (accurate fully convolutional mode), 16
(fast fully convolutional mode), and 32 (classification mode).
NOTE- output_stride relies on all consequent operators to support dilated
operators via "rate" parameter. This might require wrapping non-conv
operators to operate properly.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
scope: optional variable scope.
is_training: How to setup batch_norm and other ops. Note: most of the time
this does not need be set directly. Use mobilenet.training_scope() to set
up training instead. This parameter is here for backward compatibility
only. It is safe to set it to the value matching
training_scope(is_training=...). It is also safe to explicitly set
it to False, even if there is outer training_scope set to to training.
(The network will be built in inference mode). If this is set to None,
no arg_scope is added for slim.batch_norm's is_training parameter.
Returns:
tensor_out: output tensor.
end_points: a set of activations for external use, for example summaries or
losses.
Raises:
ValueError: depth_multiplier <= 0, or the target output_stride is not
allowed.
"""
if multiplier <= 0:
raise ValueError('multiplier is not greater than zero.')
# Set conv defs defaults and overrides.
conv_defs_defaults = conv_defs.get('defaults', {})
conv_defs_overrides = conv_defs.get('overrides', {})
if use_explicit_padding:
conv_defs_overrides = copy.deepcopy(conv_defs_overrides)
conv_defs_overrides[
(slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'}
if output_stride is not None:
if output_stride == 0 or (output_stride > 1 and output_stride % 2):
raise ValueError('Output stride must be None, 1 or a multiple of 2.')
# a) Set the tensorflow scope
# b) set padding to default: note we might consider removing this
# since it is also set by mobilenet_scope
# c) set all defaults
# d) set all extra overrides.
with _scope_all(scope, default_scope='Mobilenet'), \
safe_arg_scope([slim.batch_norm], is_training=is_training), \
_set_arg_scope_defaults(conv_defs_defaults), \
_set_arg_scope_defaults(conv_defs_overrides):
# The current_stride variable keeps track of the output stride of the
# activations, i.e., the running product of convolution strides up to the
# current network layer. This allows us to invoke atrous convolution
# whenever applying the next convolution would result in the activations
# having output stride larger than the target output_stride.
current_stride = 1
# The atrous convolution rate parameter.
rate = 1
net = inputs
# Insert default parameters before the base scope which includes
# any custom overrides set in mobilenet.
end_points = {}
scopes = {}
for i, opdef in enumerate(conv_defs['spec']):
params = dict(opdef.params)
opdef.multiplier_func(params, multiplier)
stride = params.get('stride', 1)
if output_stride is not None and current_stride == output_stride:
# If we have reached the target output_stride, then we need to employ
# atrous convolution with stride=1 and multiply the atrous rate by the
# current unit's stride for use in subsequent layers.
layer_stride = 1
layer_rate = rate
rate *= stride
else:
layer_stride = stride
layer_rate = 1
current_stride *= stride
# Update params.
params['stride'] = layer_stride
# Only insert rate to params if rate > 1.
if layer_rate > 1:
params['rate'] = layer_rate
# Set padding
if use_explicit_padding:
if 'kernel_size' in params:
net = _fixed_padding(net, params['kernel_size'], layer_rate)
else:
params['use_explicit_padding'] = True
end_point = 'layer_%d' % (i + 1)
try:
net = opdef.op(net, **params)
except Exception:
print('Failed to create op %i: %r params: %r' % (i, opdef, params))
raise
end_points[end_point] = net
scope = os.path.dirname(net.name)
scopes[scope] = end_point
if final_endpoint is not None and end_point == final_endpoint:
break
# Add all tensors that end with 'output' to
# endpoints
for t in net.graph.get_operations():
scope = os.path.dirname(t.name)
bn = os.path.basename(t.name)
if scope in scopes and t.name.endswith('output'):
end_points[scopes[scope] + '/' + bn] = t.outputs[0]
return net, end_points
@contextlib.contextmanager
def _scope_all(scope, default_scope=None):
with tf.variable_scope(scope, default_name=default_scope) as s,\
tf.name_scope(s.original_name_scope):
yield s
@slim.add_arg_scope
def mobilenet(inputs,
num_classes=1001,
prediction_fn=slim.softmax,
reuse=None,
scope='Mobilenet',
base_only=False,
**mobilenet_args):
"""Mobilenet model for classification, supports both V1 and V2.
Note: default mode is inference, use mobilenet.training_scope to create
training network.
Args:
inputs: a tensor of shape [batch_size, height, width, channels].
num_classes: number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer (before dropout)
are returned instead.
prediction_fn: a function to get predictions out of logits
(default softmax).
reuse: whether or not the network and its variables should be reused. To be
able to reuse 'scope' must be given.
scope: Optional variable_scope.
base_only: if True will only create the base of the network (no pooling
and no logits).
**mobilenet_args: passed to mobilenet_base verbatim.
- conv_defs: list of conv defs
- multiplier: Float multiplier for the depth (number of channels)
for all convolution ops. The value must be greater than zero. Typical
usage will be to set this value in (0, 1) to reduce the number of
parameters or computation cost of the model.
- output_stride: will ensure that the last layer has at most total stride.
If the architecture calls for more stride than that provided
(e.g. output_stride=16, but the architecture has 5 stride=2 operators),
it will replace output_stride with fractional convolutions using Atrous
Convolutions.
Returns:
logits: the pre-softmax activations, a tensor of size
[batch_size, num_classes]
end_points: a dictionary from components of the network to the corresponding
activation tensor.
Raises:
ValueError: Input rank is invalid.
"""
is_training = mobilenet_args.get('is_training', False)
input_shape = inputs.get_shape().as_list()
if len(input_shape) != 4:
raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
inputs = tf.identity(inputs, 'input')
net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args)
if base_only:
return net, end_points
net = tf.identity(net, name='embedding')
with tf.variable_scope('Logits'):
net = global_pool(net)
end_points['global_pool'] = net
if not num_classes:
return net, end_points
net = slim.dropout(net, scope='Dropout', is_training=is_training)
# 1 x 1 x num_classes
# Note: legacy scope name.
logits = slim.conv2d(
net,
num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='Conv2d_1c_1x1')
logits = tf.squeeze(logits, [1, 2])
logits = tf.identity(logits, name='output')
end_points['Logits'] = logits
if prediction_fn:
end_points['Predictions'] = prediction_fn(logits, 'Predictions')
return logits, end_points
def global_pool(input_tensor, pool_op=tf.nn.avg_pool):
"""Applies avg pool to produce 1x1 output.
NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
baked in average pool which has better support across hardware.
Args:
input_tensor: input tensor
pool_op: pooling op (avg pool is default)
Returns:
a tensor batch_size x 1 x 1 x depth.
"""
shape = input_tensor.get_shape().as_list()
if shape[1] is None or shape[2] is None:
kernel_size = tf.convert_to_tensor(
[1, tf.shape(input_tensor)[1],
tf.shape(input_tensor)[2], 1])
else:
kernel_size = [1, shape[1], shape[2], 1]
output = pool_op(
input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
# Recover output shape, for unknown shape.
output.set_shape([None, 1, 1, None])
return output
def training_scope(is_training=True,
weight_decay=0.00004,
stddev=0.09,
dropout_keep_prob=0.8,
bn_decay=0.997):
"""Defines Mobilenet training scope.
Usage:
with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
# the network created will be trainble with dropout/batch norm
# initialized appropriately.
Args:
is_training: if set to False this will ensure that all customizations are
set to non-training mode. This might be helpful for code that is reused
across both training/evaluation, but most of the time training_scope with
value False is not needed. If this is set to None, the parameters is not
added to the batch_norm arg_scope.
weight_decay: The weight decay to use for regularizing the model.
stddev: Standard deviation for initialization, if negative uses xavier.
dropout_keep_prob: dropout keep probability (not set if equals to None).
bn_decay: decay for the batch norm moving averages (not set if equals to
None).
Returns:
An argument scope to use via arg_scope.
"""
# Note: do not introduce parameters that would change the inference
# model here (for example whether to use bias), modify conv_def instead.
batch_norm_params = {
'decay': bn_decay,
'is_training': is_training
}
if stddev < 0:
weight_intitializer = slim.initializers.xavier_initializer()
else:
weight_intitializer = tf.truncated_normal_initializer(stddev=stddev)
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope(
[slim.conv2d, slim.fully_connected, slim.separable_conv2d],
weights_initializer=weight_intitializer,
normalizer_fn=slim.batch_norm), \
slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\
safe_arg_scope([slim.batch_norm], **batch_norm_params), \
safe_arg_scope([slim.dropout], is_training=is_training,
keep_prob=dropout_keep_prob), \
slim.arg_scope([slim.conv2d], \
weights_regularizer=slim.l2_regularizer(weight_decay)), \
slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
return s
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Convolution blocks for mobilenet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import functools
import tensorflow.compat.v1 as tf
from tensorflow.contrib import slim
def _fixed_padding(inputs, kernel_size, rate=1):
"""Pads the input along the spatial dimensions independently of input size.
Pads the input such that if it was used in a convolution with 'VALID' padding,
the output would have the same dimensions as if the unpadded input was used
in a convolution with 'SAME' padding.
Args:
inputs: A tensor of size [batch, height_in, width_in, channels].
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
rate: An integer, rate for atrous convolution.
Returns:
output: A tensor of size [batch, height_out, width_out, channels] with the
input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
"""
kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
[pad_beg[1], pad_end[1]], [0, 0]])
return padded_inputs
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
def _split_divisible(num, num_ways, divisible_by=8):
"""Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
assert num % divisible_by == 0
assert num // num_ways >= divisible_by
# Note: want to round down, we adjust each split to match the total.
base = num // num_ways // divisible_by * divisible_by
result = []
accumulated = 0
for i in range(num_ways):
r = base
while accumulated + r < num * (i + 1) // num_ways:
r += divisible_by
result.append(r)
accumulated += r
assert accumulated == num
return result
@contextlib.contextmanager
def _v1_compatible_scope_naming(scope): # pylint: disable=g-missing-docstring
if scope is None: # Create uniqified separable blocks.
with tf.variable_scope(None, default_name='separable') as s, \
tf.name_scope(s.original_name_scope):
yield ''
else:
# We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
# which provide numbered scopes.
scope += '_'
yield scope
@slim.add_arg_scope
def split_separable_conv2d(input_tensor,
num_outputs,
scope=None,
normalizer_fn=None,
stride=1,
rate=1,
endpoints=None,
use_explicit_padding=False):
"""Separable mobilenet V1 style convolution.
Depthwise convolution, with default non-linearity,
followed by 1x1 depthwise convolution. This is similar to
slim.separable_conv2d, but differs in tha it applies batch
normalization and non-linearity to depthwise. This matches
the basic building of Mobilenet Paper
(https://arxiv.org/abs/1704.04861)
Args:
input_tensor: input
num_outputs: number of outputs
scope: optional name of the scope. Note if provided it will use
scope_depthwise for deptwhise, and scope_pointwise for pointwise.
normalizer_fn: which normalizer function to use for depthwise/pointwise
stride: stride
rate: output rate (also known as dilation rate)
endpoints: optional, if provided, will export additional tensors to it.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
Returns:
output tesnor
"""
with _v1_compatible_scope_naming(scope) as scope:
dw_scope = scope + 'depthwise'
endpoints = endpoints if endpoints is not None else {}
kernel_size = [3, 3]
padding = 'SAME'
if use_explicit_padding:
padding = 'VALID'
input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
net = slim.separable_conv2d(
input_tensor,
None,
kernel_size,
depth_multiplier=1,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope=dw_scope)
endpoints[dw_scope] = net
pw_scope = scope + 'pointwise'
net = slim.conv2d(
net,
num_outputs, [1, 1],
stride=1,
normalizer_fn=normalizer_fn,
scope=pw_scope)
endpoints[pw_scope] = net
return net
def expand_input_by_factor(n, divisible_by=8):
return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
@slim.add_arg_scope
def expanded_conv(input_tensor,
num_outputs,
expansion_size=expand_input_by_factor(6),
stride=1,
rate=1,
kernel_size=(3, 3),
residual=True,
normalizer_fn=None,
split_projection=1,
split_expansion=1,
expansion_transform=None,
depthwise_location='expansion',
depthwise_channel_multiplier=1,
endpoints=None,
use_explicit_padding=False,
padding='SAME',
scope=None):
"""Depthwise Convolution Block with expansion.
Builds a composite convolution that has the following structure
expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
Args:
input_tensor: input
num_outputs: number of outputs in the final layer.
expansion_size: the size of expansion, could be a constant or a callable.
If latter it will be provided 'num_inputs' as an input. For forward
compatibility it should accept arbitrary keyword arguments.
Default will expand the input by factor of 6.
stride: depthwise stride
rate: depthwise rate
kernel_size: depthwise kernel
residual: whether to include residual connection between input
and output.
normalizer_fn: batchnorm or otherwise
split_projection: how many ways to split projection operator
(that is conv expansion->bottleneck)
split_expansion: how many ways to split expansion op
(that is conv bottleneck->expansion) ops will keep depth divisible
by this value.
expansion_transform: Optional function that takes expansion
as a single input and returns output.
depthwise_location: where to put depthwise covnvolutions supported
values None, 'input', 'output', 'expansion'
depthwise_channel_multiplier: depthwise channel multiplier:
each input will replicated (with different filters)
that many times. So if input had c channels,
output will have c x depthwise_channel_multpilier.
endpoints: An optional dictionary into which intermediate endpoints are
placed. The keys "expansion_output", "depthwise_output",
"projection_output" and "expansion_transform" are always populated, even
if the corresponding functions are not invoked.
use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
inputs so that the output dimensions are the same as if 'SAME' padding
were used.
padding: Padding type to use if `use_explicit_padding` is not set.
scope: optional scope.
Returns:
Tensor of depth num_outputs
Raises:
TypeError: on inval
"""
with tf.variable_scope(scope, default_name='expanded_conv') as s, \
tf.name_scope(s.original_name_scope):
prev_depth = input_tensor.get_shape().as_list()[3]
if depthwise_location not in [None, 'input', 'output', 'expansion']:
raise TypeError('%r is unknown value for depthwise_location' %
depthwise_location)
if use_explicit_padding:
if padding != 'SAME':
raise TypeError('`use_explicit_padding` should only be used with '
'"SAME" padding.')
padding = 'VALID'
depthwise_func = functools.partial(
slim.separable_conv2d,
num_outputs=None,
kernel_size=kernel_size,
depth_multiplier=depthwise_channel_multiplier,
stride=stride,
rate=rate,
normalizer_fn=normalizer_fn,
padding=padding,
scope='depthwise')
# b1 -> b2 * r -> b2
# i -> (o * r) (bottleneck) -> o
input_tensor = tf.identity(input_tensor, 'input')
net = input_tensor
if depthwise_location == 'input':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
if callable(expansion_size):
inner_size = expansion_size(num_inputs=prev_depth)
else:
inner_size = expansion_size
if inner_size > net.shape[3]:
net = split_conv(
net,
inner_size,
num_ways=split_expansion,
scope='expand',
stride=1,
normalizer_fn=normalizer_fn)
net = tf.identity(net, 'expansion_output')
if endpoints is not None:
endpoints['expansion_output'] = net
if depthwise_location == 'expansion':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net)
net = tf.identity(net, name='depthwise_output')
if endpoints is not None:
endpoints['depthwise_output'] = net
if expansion_transform:
net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
# Note in contrast with expansion, we always have
# projection to produce the desired output size.
net = split_conv(
net,
num_outputs,
num_ways=split_projection,
stride=1,
scope='project',
normalizer_fn=normalizer_fn,
activation_fn=tf.identity)
if endpoints is not None:
endpoints['projection_output'] = net
if depthwise_location == 'output':
if use_explicit_padding:
net = _fixed_padding(net, kernel_size, rate)
net = depthwise_func(net, activation_fn=None)
if callable(residual): # custom residual
net = residual(input_tensor=input_tensor, output_tensor=net)
elif (residual and
# stride check enforces that we don't add residuals when spatial
# dimensions are None
stride == 1 and
# Depth matches
net.get_shape().as_list()[3] ==
input_tensor.get_shape().as_list()[3]):
net += input_tensor
return tf.identity(net, name='output')
def split_conv(input_tensor,
num_outputs,
num_ways,
scope,
divisible_by=8,
**kwargs):
"""Creates a split convolution.
Split convolution splits the input and output into
'num_blocks' blocks of approximately the same size each,
and only connects $i$-th input to $i$ output.
Args:
input_tensor: input tensor
num_outputs: number of output filters
num_ways: num blocks to split by.
scope: scope for all the operators.
divisible_by: make sure that every part is divisiable by this.
**kwargs: will be passed directly into conv2d operator
Returns:
tensor
"""
b = input_tensor.get_shape().as_list()[3]
if num_ways == 1 or min(b // num_ways,
num_outputs // num_ways) < divisible_by:
# Don't do any splitting if we end up with less than 8 filters
# on either side.
return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
outs = []
input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
output_splits = _split_divisible(
num_outputs, num_ways, divisible_by=divisible_by)
inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
base = scope
for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
scope = base + '_part_%d' % (i,)
n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
n = tf.identity(n, scope + '_output')
outs.append(n)
return tf.concat(outs, 3, name=scope + '_concat')
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for mobilenet_v2, branched from slim for fp16 performance study."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import tensorflow.compat.v1 as tf
from models.tf1_only import mobilenet
from models.tf1_only import mobilenet_conv_blocks as ops
from models.tf1_only import mobilenet_v2
from tensorflow.contrib import slim
def find_ops(optype):
"""Find ops of a given type in graphdef or a graph.
Args:
optype: operation type (e.g. Conv2D)
Returns:
List of operations.
"""
gd = tf.get_default_graph()
return [var for var in gd.get_operations() if var.type == optype]
class MobilenetV2Test(tf.test.TestCase):
def setUp(self): # pylint: disable=g-missing-super-call
tf.reset_default_graph()
def testCreation(self):
spec = dict(mobilenet_v2.V2_DEF)
_, ep = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
num_convs = len(find_ops('Conv2D'))
# This is mostly a sanity test. No deep reason for these particular
# constants.
#
# All but first 2 and last one have two convolutions, and there is one
# extra conv that is not in the spec. (logits)
self.assertEqual(num_convs, len(spec['spec']) * 2 - 2)
# Check that depthwise are exposed.
for i in range(2, 17):
self.assertIn('layer_%d/depthwise_output' % i, ep)
def testCreationNoClasses(self):
spec = copy.deepcopy(mobilenet_v2.V2_DEF)
net, ep = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec,
num_classes=None)
self.assertIs(net, ep['global_pool'])
def testImageSizes(self):
for input_size, output_size in [(224, 7), (192, 6), (160, 5),
(128, 4), (96, 3)]:
tf.reset_default_graph()
_, ep = mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, input_size, input_size, 3)))
self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3],
[output_size] * 2)
def testWithSplits(self):
spec = copy.deepcopy(mobilenet_v2.V2_DEF)
spec['overrides'] = {
(ops.expanded_conv,): dict(split_expansion=2),
}
_, _ = mobilenet.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=spec)
num_convs = len(find_ops('Conv2D'))
# All but 3 op has 3 conv operatore, the remainign 3 have one
# and there is one unaccounted.
self.assertEqual(num_convs, len(spec['spec']) * 3 - 5)
def testWithOutputStride8(self):
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=8,
scope='MobilenetV2')
self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def testDivisibleBy(self):
tf.reset_default_graph()
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
divisible_by=16,
min_depth=32)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280,
1001], s)
def testDivisibleByWithArgScope(self):
tf.reset_default_graph()
# Verifies that depth_multiplier arg scope actually works
# if no default min_depth is provided.
with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 2)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
self.assertSameElements(s, [32, 192, 128, 1001])
def testFineGrained(self):
tf.reset_default_graph()
# Verifies that depth_multiplier arg scope actually works
# if no default min_depth is provided.
mobilenet_v2.mobilenet(
tf.placeholder(tf.float32, (10, 224, 224, 2)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01,
finegrain_classification_mode=True)
s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')]
s = set(s)
# All convolutions will be 8->48, except for the last one.
self.assertSameElements(s, [8, 48, 1001, 1280])
def testMobilenetBase(self):
tf.reset_default_graph()
# Verifies that mobilenet_base returns pre-pooling layer.
with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32):
net, _ = mobilenet_v2.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1)
self.assertEqual(net.get_shape().as_list(), [10, 7, 7, 128])
def testWithOutputStride16(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=16)
self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
def testWithOutputStride8AndExplicitPadding(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=8,
use_explicit_padding=True,
scope='MobilenetV2')
self.assertEqual(out.get_shape().as_list()[1:3], [28, 28])
def testWithOutputStride16AndExplicitPadding(self):
tf.reset_default_graph()
out, _ = mobilenet.mobilenet_base(
tf.placeholder(tf.float32, (10, 224, 224, 16)),
conv_defs=mobilenet_v2.V2_DEF,
output_stride=16,
use_explicit_padding=True)
self.assertEqual(out.get_shape().as_list()[1:3], [14, 14])
def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self):
sc = mobilenet.training_scope(is_training=None)
self.assertNotIn('is_training', sc[slim.arg_scope_func_key(
slim.batch_norm)])
def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self):
sc = mobilenet.training_scope(is_training=False)
self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
sc = mobilenet.training_scope(is_training=True)
self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
sc = mobilenet.training_scope()
self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mobilenet V2 model, branched from slim models for fp16 performance study.
Architecture: https://arxiv.org/abs/1801.04381
The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
3.4 M parameters.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import tensorflow.compat.v1 as tf
from models import model
from models.tf1_only import mobilenet as lib
from models.tf1_only import mobilenet_conv_blocks as ops
from tensorflow.contrib import slim
op = lib.op
expand_input = ops.expand_input_by_factor
# pyformat: disable
# Architecture: https://arxiv.org/abs/1801.04381
V2_DEF = dict(
defaults={
# Note: these parameters of batch norm affect the architecture
# that's why they are here and not in training_scope.
(slim.batch_norm,): {'center': True, 'scale': True},
(slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
},
(ops.expanded_conv,): {
'expansion_size': expand_input(6),
'split_expansion': 1,
'normalizer_fn': slim.batch_norm,
'residual': True
},
(slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
},
spec=[
op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
op(ops.expanded_conv,
expansion_size=expand_input(1, divisible_by=1),
num_outputs=16),
op(ops.expanded_conv, stride=2, num_outputs=24),
op(ops.expanded_conv, stride=1, num_outputs=24),
op(ops.expanded_conv, stride=2, num_outputs=32),
op(ops.expanded_conv, stride=1, num_outputs=32),
op(ops.expanded_conv, stride=1, num_outputs=32),
op(ops.expanded_conv, stride=2, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=64),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=1, num_outputs=96),
op(ops.expanded_conv, stride=2, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=160),
op(ops.expanded_conv, stride=1, num_outputs=320),
op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
],
)
# pyformat: enable
@slim.add_arg_scope
def mobilenet(input_tensor,
num_classes=1001,
depth_multiplier=1.0,
scope='MobilenetV2',
conv_defs=None,
finegrain_classification_mode=False,
min_depth=None,
divisible_by=None,
**kwargs):
"""Creates mobilenet V2 network.
Inference mode is created by default. To create training use training_scope
below.
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
Args:
input_tensor: The input tensor
num_classes: number of classes
depth_multiplier: The multiplier applied to scale number of
channels in each layer. Note: this is called depth multiplier in the
paper but the name is kept for consistency with slim's model builder.
scope: Scope of the operator
conv_defs: Allows to override default conv def.
finegrain_classification_mode: When set to True, the model
will keep the last layer large even for small multipliers. Following
https://arxiv.org/abs/1801.04381
suggests that it improves performance for ImageNet-type of problems.
*Note* ignored if final_endpoint makes the builder exit earlier.
min_depth: If provided, will ensure that all layers will have that
many channels after application of depth multiplier.
divisible_by: If provided will ensure that all layers # channels
will be divisible by this number.
**kwargs: passed directly to mobilenet.mobilenet:
prediction_fn- what prediction function to use.
reuse-: whether to reuse variables (if reuse set to true, scope
must be given).
Returns:
logits/endpoints pair
Raises:
ValueError: On invalid arguments
"""
if conv_defs is None:
conv_defs = V2_DEF
if 'multiplier' in kwargs:
raise ValueError('mobilenetv2 doesn\'t support generic '
'multiplier parameter use "depth_multiplier" instead.')
if finegrain_classification_mode:
conv_defs = copy.deepcopy(conv_defs)
if depth_multiplier < 1:
conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
depth_args = {}
# NB: do not set depth_args unless they are provided to avoid overriding
# whatever default depth_multiplier might have thanks to arg_scope.
if min_depth is not None:
depth_args['min_depth'] = min_depth
if divisible_by is not None:
depth_args['divisible_by'] = divisible_by
with slim.arg_scope((lib.depth_multiplier,), **depth_args):
return lib.mobilenet(
input_tensor,
num_classes=num_classes,
conv_defs=conv_defs,
scope=scope,
multiplier=depth_multiplier,
**kwargs)
@slim.add_arg_scope
def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
"""Creates base of the mobilenet (no pooling and no logits) ."""
return mobilenet(
input_tensor, depth_multiplier=depth_multiplier, base_only=True, **kwargs)
def training_scope(**kwargs):
"""Defines MobilenetV2 training scope.
Usage:
with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
with slim.
Args:
**kwargs: Passed to mobilenet.training_scope. The following parameters
are supported:
weight_decay- The weight decay to use for regularizing the model.
stddev- Standard deviation for initialization, if negative uses xavier.
dropout_keep_prob- dropout keep probability
bn_decay- decay for the batch norm moving averages.
Returns:
An `arg_scope` to use for the mobilenet v2 model.
"""
return lib.training_scope(**kwargs)
class MobilenetModel(model.CNNModel):
"""Mobilenet model configuration."""
def __init__(self, params=None):
super(MobilenetModel, self).__init__(
'mobilenet', 224, 32, 0.005, params=params)
def add_inference(self, cnn):
with slim.arg_scope(training_scope(is_training=cnn.phase_train)):
cnn.top_layer, _ = mobilenet(cnn.top_layer, is_training=cnn.phase_train)
cnn.top_size = cnn.top_layer.shape[-1].value
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model configurations for nasnet.
Paper: https://arxiv.org/abs/1707.07012
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from models import model
from models.tf1_only import nasnet_utils
from tensorflow.contrib import framework as contrib_framework
from tensorflow.contrib import layers as contrib_layers
from tensorflow.contrib import slim
from tensorflow.contrib import training as contrib_training
arg_scope = contrib_framework.arg_scope
# Notes for training NASNet Cifar Model
# -------------------------------------
# batch_size: 32
# learning rate: 0.025
# cosine (single period) learning rate decay
# auxiliary head loss weighting: 0.4
# clip global norm of all gradients by 5
def _cifar_config(is_training=True, data_format=None, total_steps=None):
drop_path_keep_prob = 1.0 if not is_training else 0.6
return contrib_training.HParams(
stem_multiplier=3.0,
drop_path_keep_prob=drop_path_keep_prob,
num_cells=18,
use_aux_head=1,
num_conv_filters=32,
dense_dropout_keep_prob=1.0,
filter_scaling_rate=2.0,
num_reduction_layers=2,
skip_reduction_layer_input=0,
data_format=data_format or 'NHWC',
# 600 epochs with a batch size of 32
# This is used for the drop path probabilities since it needs to increase
# the drop out probability over the course of training.
total_training_steps=total_steps or 937500,
)
# Notes for training large NASNet model on ImageNet
# -------------------------------------
# batch size (per replica): 16
# learning rate: 0.015 * 100
# learning rate decay factor: 0.97
# num epochs per decay: 2.4
# sync sgd with 100 replicas
# auxiliary head loss weighting: 0.4
# label smoothing: 0.1
# clip global norm of all gradients by 10
def _large_imagenet_config(is_training=True, data_format=None,
total_steps=None):
drop_path_keep_prob = 1.0 if not is_training else 0.7
return contrib_training.HParams(
stem_multiplier=3.0,
dense_dropout_keep_prob=0.5,
num_cells=18,
filter_scaling_rate=2.0,
num_conv_filters=168,
drop_path_keep_prob=drop_path_keep_prob,
use_aux_head=1,
num_reduction_layers=2,
skip_reduction_layer_input=1,
data_format=data_format or 'NHWC',
total_training_steps=total_steps or 250000,
)
# Notes for training the mobile NASNet ImageNet model
# -------------------------------------
# batch size (per replica): 32
# learning rate: 0.04 * 50
# learning rate scaling factor: 0.97
# num epochs per decay: 2.4
# sync sgd with 50 replicas
# auxiliary head weighting: 0.4
# label smoothing: 0.1
# clip global norm of all gradients by 10
def _mobile_imagenet_config(data_format=None, total_steps=None):
return contrib_training.HParams(
stem_multiplier=1.0,
dense_dropout_keep_prob=0.5,
num_cells=12,
filter_scaling_rate=2.0,
drop_path_keep_prob=1.0,
num_conv_filters=44,
use_aux_head=1,
num_reduction_layers=2,
skip_reduction_layer_input=0,
data_format=data_format or 'NHWC',
total_training_steps=total_steps or 250000,
)
def nasnet_cifar_arg_scope(weight_decay=5e-4,
batch_norm_decay=0.9,
batch_norm_epsilon=1e-5):
"""Defines the default arg scope for the NASNet-A Cifar model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Cifar Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = contrib_layers.l2_regularizer(weight_decay)
weights_initializer = contrib_layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope(
[slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'):
with arg_scope(
[slim.conv2d, slim.separable_conv2d],
activation_fn=None,
biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def nasnet_mobile_arg_scope(weight_decay=4e-5,
batch_norm_decay=0.9997,
batch_norm_epsilon=1e-3):
"""Defines the default arg scope for the NASNet-A Mobile ImageNet model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Mobile Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = contrib_layers.l2_regularizer(weight_decay)
weights_initializer = contrib_layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope(
[slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'):
with arg_scope(
[slim.conv2d, slim.separable_conv2d],
activation_fn=None,
biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def nasnet_large_arg_scope(weight_decay=5e-5,
batch_norm_decay=0.9997,
batch_norm_epsilon=1e-3):
"""Defines the default arg scope for the NASNet-A Large ImageNet model.
Args:
weight_decay: The weight decay to use for regularizing the model.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
Returns:
An `arg_scope` to use for the NASNet Large Model.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
'scale': True,
'fused': True,
}
weights_regularizer = contrib_layers.l2_regularizer(weight_decay)
weights_initializer = contrib_layers.variance_scaling_initializer(
mode='FAN_OUT')
with arg_scope(
[slim.fully_connected, slim.conv2d, slim.separable_conv2d],
weights_regularizer=weights_regularizer,
weights_initializer=weights_initializer):
with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'):
with arg_scope(
[slim.conv2d, slim.separable_conv2d],
activation_fn=None,
biases_initializer=None):
with arg_scope([slim.batch_norm], **batch_norm_params) as sc:
return sc
def _build_aux_head(net, end_points, num_classes, hparams, scope):
"""Auxiliary head used for all models across all datasets."""
with tf.variable_scope(scope):
aux_logits = tf.identity(net)
with tf.variable_scope('aux_logits'):
aux_logits = slim.avg_pool2d(
aux_logits, [5, 5], stride=3, padding='VALID')
aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj')
aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0')
aux_logits = tf.nn.relu(aux_logits)
# Shape of feature map before the final layer.
shape = aux_logits.shape
if hparams.data_format == 'NHWC':
shape = shape[1:3]
else:
shape = shape[2:4]
aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID')
aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1')
aux_logits = tf.nn.relu(aux_logits)
aux_logits = contrib_layers.flatten(aux_logits)
aux_logits = slim.fully_connected(aux_logits, num_classes)
end_points['AuxLogits'] = aux_logits
def _imagenet_stem(inputs, hparams, stem_cell):
"""Stem used for models trained on ImageNet."""
num_stem_cells = 2
# 149 x 149 x 32
num_stem_filters = int(32 * hparams.stem_multiplier)
net = slim.conv2d(
inputs,
num_stem_filters, [3, 3],
stride=2,
scope='conv0',
padding='VALID')
net = slim.batch_norm(net, scope='conv0_bn')
# Run the reduction cells
cell_outputs = [None, net]
filter_scaling = 1.0 / (hparams.filter_scaling_rate**num_stem_cells)
for cell_num in range(num_stem_cells):
net = stem_cell(
net,
scope='cell_stem_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=2,
prev_layer=cell_outputs[-2],
cell_num=cell_num)
cell_outputs.append(net)
filter_scaling *= hparams.filter_scaling_rate
return net, cell_outputs
def _cifar_stem(inputs, hparams):
"""Stem used for models trained on Cifar."""
num_stem_filters = int(hparams.num_conv_filters * hparams.stem_multiplier)
net = slim.conv2d(inputs, num_stem_filters, 3, scope='l1_stem_3x3')
net = slim.batch_norm(net, scope='l1_stem_bn')
return net, [None, net]
def build_nasnet_cifar(images,
num_classes=None,
is_training=True,
data_format=None,
total_steps=None):
"""Build NASNet model for the Cifar Dataset."""
hparams = _cifar_config(
is_training=is_training, data_format=data_format, total_steps=total_steps)
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
with arg_scope(
[slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
is_training=is_training):
with arg_scope(
[
slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm,
slim.separable_conv2d, nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim
],
data_format=hparams.data_format):
return _build_nasnet_base(
images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='cifar')
build_nasnet_cifar.default_image_size = 32
def build_nasnet_mobile(images,
num_classes=None,
is_training=True,
data_format=None,
total_steps=None,
final_endpoint=None):
"""Build NASNet Mobile model for the ImageNet Dataset."""
hparams = _mobile_imagenet_config(
data_format=data_format, total_steps=total_steps)
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
# If ImageNet, then add an additional two for the stem cells
total_num_cells += 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
with arg_scope(
[slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
is_training=is_training):
with arg_scope(
[
slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm,
slim.separable_conv2d, nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim
],
data_format=hparams.data_format):
return _build_nasnet_base(
images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='imagenet',
final_endpoint=final_endpoint)
build_nasnet_mobile.default_image_size = 224
def build_nasnet_large(images,
num_classes=None,
is_training=True,
data_format=None,
total_steps=None,
final_endpoint=None):
"""Build NASNet Large model for the ImageNet Dataset."""
hparams = _large_imagenet_config(
is_training=is_training, data_format=data_format, total_steps=total_steps)
if tf.test.is_gpu_available() and hparams.data_format == 'NHWC':
tf.logging.info('A GPU is available on the machine, consider using NCHW '
'data format for increased speed on GPU.')
# Calculate the total number of cells in the network
# Add 2 for the reduction cells
total_num_cells = hparams.num_cells + 2
# If ImageNet, then add an additional two for the stem cells
total_num_cells += 2
normal_cell = nasnet_utils.NasNetANormalCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
reduction_cell = nasnet_utils.NasNetAReductionCell(
hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells,
hparams.total_training_steps)
with arg_scope(
[slim.dropout, nasnet_utils.drop_path, slim.batch_norm],
is_training=is_training):
with arg_scope(
[
slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm,
slim.separable_conv2d, nasnet_utils.factorized_reduction,
nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index,
nasnet_utils.get_channel_dim
],
data_format=hparams.data_format):
return _build_nasnet_base(
images,
normal_cell=normal_cell,
reduction_cell=reduction_cell,
num_classes=num_classes,
hparams=hparams,
is_training=is_training,
stem_type='imagenet',
final_endpoint=final_endpoint)
build_nasnet_large.default_image_size = 331
def _build_nasnet_base(images,
normal_cell,
reduction_cell,
num_classes,
hparams,
is_training,
stem_type,
final_endpoint=None):
"""Constructs a NASNet image model."""
end_points = {}
def add_and_check_endpoint(endpoint_name, net):
end_points[endpoint_name] = net
return final_endpoint and (endpoint_name == final_endpoint)
# Find where to place the reduction cells or stride normal cells
reduction_indices = nasnet_utils.calc_reduction_layers(
hparams.num_cells, hparams.num_reduction_layers)
stem_cell = reduction_cell
if stem_type == 'imagenet':
stem = lambda: _imagenet_stem(images, hparams, stem_cell)
elif stem_type == 'cifar':
stem = lambda: _cifar_stem(images, hparams)
else:
raise ValueError('Unknown stem_type: ', stem_type)
net, cell_outputs = stem()
if add_and_check_endpoint('Stem', net):
return net, end_points
# Setup for building in the auxiliary head.
aux_head_cell_idxes = []
if len(reduction_indices) >= 2:
aux_head_cell_idxes.append(reduction_indices[1] - 1)
# Run the cells
filter_scaling = 1.0
# true_cell_num accounts for the stem cells
true_cell_num = 2 if stem_type == 'imagenet' else 0
for cell_num in range(hparams.num_cells):
stride = 1
if hparams.skip_reduction_layer_input:
prev_layer = cell_outputs[-2]
if cell_num in reduction_indices:
filter_scaling *= hparams.filter_scaling_rate
net = reduction_cell(
net,
scope='reduction_cell_{}'.format(reduction_indices.index(cell_num)),
filter_scaling=filter_scaling,
stride=2,
prev_layer=cell_outputs[-2],
cell_num=true_cell_num)
if add_and_check_endpoint(
'Reduction_Cell_{}'.format(reduction_indices.index(cell_num)), net):
return net, end_points
true_cell_num += 1
cell_outputs.append(net)
if not hparams.skip_reduction_layer_input:
prev_layer = cell_outputs[-2]
net = normal_cell(
net,
scope='cell_{}'.format(cell_num),
filter_scaling=filter_scaling,
stride=stride,
prev_layer=prev_layer,
cell_num=true_cell_num)
if add_and_check_endpoint('Cell_{}'.format(cell_num), net):
return net, end_points
true_cell_num += 1
if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and
num_classes and is_training):
aux_net = tf.nn.relu(net)
_build_aux_head(
aux_net,
end_points,
num_classes,
hparams,
scope='aux_{}'.format(cell_num))
cell_outputs.append(net)
# Final softmax layer
with tf.variable_scope('final_layer'):
net = tf.nn.relu(net)
net = nasnet_utils.global_avg_pool(net)
if add_and_check_endpoint('global_pool', net) or num_classes is None:
return net, end_points
net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout')
logits = slim.fully_connected(net, num_classes)
if add_and_check_endpoint('Logits', logits):
return net, end_points
predictions = tf.nn.softmax(logits, name='predictions')
if add_and_check_endpoint('Predictions', predictions):
return net, end_points
return logits, end_points
class NasnetModel(model.CNNModel):
"""Nasnet model configuration."""
def __init__(self, params=None):
super(NasnetModel, self).__init__('nasnet', 224, 32, 0.005, params=params)
def add_inference(self, cnn):
tf.logging.info('input_image_shape: {}'.format(cnn.top_layer.shape))
cnn.top_layer, _ = build_nasnet_mobile(
images=cnn.top_layer,
is_training=cnn.phase_train,
data_format=cnn.data_format)
cnn.top_size = cnn.top_layer.shape[-1].value
class NasnetLargeModel(model.CNNModel):
"""Nasnet model configuration."""
def __init__(self, params=None):
super(NasnetLargeModel, self).__init__(
'nasnet', 331, 16, 0.005, params=params)
def add_inference(self, cnn):
tf.logging.info('input_image_shape: {}'.format(cnn.top_layer.shape))
cnn.top_layer, _ = build_nasnet_large(
images=cnn.top_layer,
is_training=cnn.phase_train,
data_format=cnn.data_format)
cnn.top_size = cnn.top_layer.shape[-1].value
class NasnetCifarModel(model.CNNModel):
"""Nasnet cifar model configuration."""
def __init__(self, params=None):
super(NasnetCifarModel, self).__init__(
'nasnet', 32, 32, 0.025, params=params)
def add_inference(self, cnn):
tf.logging.info('input_image_shape: {}'.format(cnn.top_layer.shape))
cnn.top_layer, _ = build_nasnet_cifar(
images=cnn.top_layer,
is_training=cnn.phase_train,
data_format=cnn.data_format)
cnn.top_size = cnn.top_layer.shape[-1].value
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nasnet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from models.tf1_only import nasnet_model as nasnet
from tensorflow.contrib import slim
class NASNetTest(tf.test.TestCase):
def testBuildLogitsCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = 10
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
logits, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildLogitsMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildLogitsLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
logits, end_points = nasnet.build_nasnet_large(inputs, num_classes)
auxlogits = end_points['AuxLogits']
predictions = end_points['Predictions']
self.assertListEqual(auxlogits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(logits.get_shape().as_list(),
[batch_size, num_classes])
self.assertListEqual(predictions.get_shape().as_list(),
[batch_size, num_classes])
def testBuildPreLogitsCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
net, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
self.assertNotIn('AuxLogits', end_points)
self.assertNotIn('Predictions', end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 768])
def testBuildPreLogitsMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
net, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
self.assertNotIn('AuxLogits', end_points)
self.assertNotIn('Predictions', end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 1056])
def testBuildPreLogitsLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = None
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
net, end_points = nasnet.build_nasnet_large(inputs, num_classes)
self.assertNotIn('AuxLogits', end_points)
self.assertNotIn('Predictions', end_points)
self.assertTrue(net.op.name.startswith('final_layer/Mean'))
self.assertListEqual(net.get_shape().as_list(), [batch_size, 4032])
def testAllEndPointsShapesCifarModel(self):
batch_size = 5
height, width = 32, 32
num_classes = 10
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_cifar_arg_scope()):
_, end_points = nasnet.build_nasnet_cifar(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 32, 32, 96],
'Cell_0': [batch_size, 32, 32, 192],
'Cell_1': [batch_size, 32, 32, 192],
'Cell_2': [batch_size, 32, 32, 192],
'Cell_3': [batch_size, 32, 32, 192],
'Cell_4': [batch_size, 32, 32, 192],
'Cell_5': [batch_size, 32, 32, 192],
'Cell_6': [batch_size, 16, 16, 384],
'Cell_7': [batch_size, 16, 16, 384],
'Cell_8': [batch_size, 16, 16, 384],
'Cell_9': [batch_size, 16, 16, 384],
'Cell_10': [batch_size, 16, 16, 384],
'Cell_11': [batch_size, 16, 16, 384],
'Cell_12': [batch_size, 8, 8, 768],
'Cell_13': [batch_size, 8, 8, 768],
'Cell_14': [batch_size, 8, 8, 768],
'Cell_15': [batch_size, 8, 8, 768],
'Cell_16': [batch_size, 8, 8, 768],
'Cell_17': [batch_size, 8, 8, 768],
'Reduction_Cell_0': [batch_size, 16, 16, 256],
'Reduction_Cell_1': [batch_size, 8, 8, 512],
'global_pool': [batch_size, 768],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertCountEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertIn(endpoint_name, end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testAllEndPointsShapesMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
_, end_points = nasnet.build_nasnet_mobile(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 28, 28, 88],
'Cell_0': [batch_size, 28, 28, 264],
'Cell_1': [batch_size, 28, 28, 264],
'Cell_2': [batch_size, 28, 28, 264],
'Cell_3': [batch_size, 28, 28, 264],
'Cell_4': [batch_size, 14, 14, 528],
'Cell_5': [batch_size, 14, 14, 528],
'Cell_6': [batch_size, 14, 14, 528],
'Cell_7': [batch_size, 14, 14, 528],
'Cell_8': [batch_size, 7, 7, 1056],
'Cell_9': [batch_size, 7, 7, 1056],
'Cell_10': [batch_size, 7, 7, 1056],
'Cell_11': [batch_size, 7, 7, 1056],
'Reduction_Cell_0': [batch_size, 14, 14, 352],
'Reduction_Cell_1': [batch_size, 7, 7, 704],
'global_pool': [batch_size, 1056],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertCountEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertIn(endpoint_name, end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testAllEndPointsShapesLargeModel(self):
batch_size = 5
height, width = 331, 331
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
with slim.arg_scope(nasnet.nasnet_large_arg_scope()):
_, end_points = nasnet.build_nasnet_large(inputs, num_classes)
endpoints_shapes = {'Stem': [batch_size, 42, 42, 336],
'Cell_0': [batch_size, 42, 42, 1008],
'Cell_1': [batch_size, 42, 42, 1008],
'Cell_2': [batch_size, 42, 42, 1008],
'Cell_3': [batch_size, 42, 42, 1008],
'Cell_4': [batch_size, 42, 42, 1008],
'Cell_5': [batch_size, 42, 42, 1008],
'Cell_6': [batch_size, 21, 21, 2016],
'Cell_7': [batch_size, 21, 21, 2016],
'Cell_8': [batch_size, 21, 21, 2016],
'Cell_9': [batch_size, 21, 21, 2016],
'Cell_10': [batch_size, 21, 21, 2016],
'Cell_11': [batch_size, 21, 21, 2016],
'Cell_12': [batch_size, 11, 11, 4032],
'Cell_13': [batch_size, 11, 11, 4032],
'Cell_14': [batch_size, 11, 11, 4032],
'Cell_15': [batch_size, 11, 11, 4032],
'Cell_16': [batch_size, 11, 11, 4032],
'Cell_17': [batch_size, 11, 11, 4032],
'Reduction_Cell_0': [batch_size, 21, 21, 1344],
'Reduction_Cell_1': [batch_size, 11, 11, 2688],
'global_pool': [batch_size, 4032],
# Logits and predictions
'AuxLogits': [batch_size, num_classes],
'Logits': [batch_size, num_classes],
'Predictions': [batch_size, num_classes]}
self.assertCountEqual(endpoints_shapes.keys(), end_points.keys())
for endpoint_name in endpoints_shapes:
tf.logging.info('Endpoint name: {}'.format(endpoint_name))
expected_shape = endpoints_shapes[endpoint_name]
self.assertIn(endpoint_name, end_points)
self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
expected_shape)
def testVariablesSetDeviceMobileModel(self):
batch_size = 5
height, width = 224, 224
num_classes = 1000
inputs = tf.random_uniform((batch_size, height, width, 3))
tf.train.create_global_step()
# Force all Variables to reside on the device.
with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
nasnet.build_nasnet_mobile(inputs, num_classes)
with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
nasnet.build_nasnet_mobile(inputs, num_classes)
for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
self.assertDeviceEqual(v.device, '/cpu:0')
for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
self.assertDeviceEqual(v.device, '/gpu:0')
def testUnknownBatchSizeMobileModel(self):
batch_size = 1
height, width = 224, 224
num_classes = 1000
with self.test_session() as sess:
inputs = tf.placeholder(tf.float32, (None, height, width, 3))
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, _ = nasnet.build_nasnet_mobile(inputs, num_classes)
self.assertListEqual(logits.get_shape().as_list(),
[None, num_classes])
images = tf.random_uniform((batch_size, height, width, 3))
sess.run(tf.global_variables_initializer())
output = sess.run(logits, {inputs: images.eval()})
self.assertEqual(output.shape, (batch_size, num_classes))
def testEvaluationMobileModel(self):
batch_size = 2
height, width = 224, 224
num_classes = 1000
with self.test_session() as sess:
eval_inputs = tf.random_uniform((batch_size, height, width, 3))
with slim.arg_scope(nasnet.nasnet_mobile_arg_scope()):
logits, _ = nasnet.build_nasnet_mobile(eval_inputs,
num_classes,
is_training=False)
predictions = tf.argmax(logits, 1)
sess.run(tf.global_variables_initializer())
output = sess.run(predictions)
self.assertEqual(output.shape, (batch_size,))
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD300 Model Configuration.
References:
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
SSD: Single Shot MultiBox Detector
arXiv:1512.02325
Ported from MLPerf reference implementation:
https://github.com/mlperf/reference/tree/ssd/single_stage_detector/ssd
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import os
import re
import threading
import tensorflow.compat.v1 as tf
# pylint: disable=g-direct-tensorflow-import
import constants
import mlperf
import ssd_constants
from cnn_util import log_fn
from models import model as model_lib
from models import resnet_model
from tensorflow.contrib import layers as contrib_layers
from tensorflow.python.ops import variables
BACKBONE_MODEL_SCOPE_NAME = 'resnet34_backbone'
class SSD300Model(model_lib.CNNModel):
"""Single Shot Multibox Detection (SSD) model for 300x300 image datasets."""
def __init__(self, label_num=ssd_constants.NUM_CLASSES, batch_size=32,
learning_rate=1e-3, backbone='resnet34', params=None):
super(SSD300Model, self).__init__('ssd300', 300, batch_size, learning_rate,
params=params)
# For COCO dataset, 80 categories + 1 background = 81 labels
self.label_num = label_num
# Currently only support ResNet-34 as backbone model
if backbone != 'resnet34':
raise ValueError('Invalid backbone model %s for SSD.' % backbone)
mlperf.logger.log(key=mlperf.tags.BACKBONE, value=backbone)
# Number of channels and default boxes associated with the following layers:
# ResNet34 layer, Conv7, Conv8_2, Conv9_2, Conv10_2, Conv11_2
self.out_chan = [256, 512, 512, 256, 256, 256]
mlperf.logger.log(key=mlperf.tags.LOC_CONF_OUT_CHANNELS,
value=self.out_chan)
# Number of default boxes from layers of different scales
# 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
self.num_dboxes = [4, 6, 6, 6, 4, 4]
mlperf.logger.log(key=mlperf.tags.NUM_DEFAULTS_PER_CELL,
value=self.num_dboxes)
# TODO(haoyuzhang): in order to correctly restore in replicated mode, need
# to create a saver for each tower before graph is finalized. Use variable
# manager for better efficiency.
self.backbone_savers = []
# Collected predictions for eval stage. It maps each image id in eval
# dataset to a dict containing the following information:
# source_id: raw ID of image
# raw_shape: raw shape of image
# pred_box: encoded box coordinates of prediction
# pred_scores: scores of classes in prediction
self.predictions = {}
# Global step when predictions are collected.
self.eval_global_step = 0
# Average precision. In asynchronous eval mode, this is the latest AP we
# get so far and may not be the results at current eval step.
self.eval_coco_ap = 0
# Process, queues, and thread for asynchronous evaluation. When enabled,
# create a separate process (async_eval_process) that continuously pull
# intermediate results from the predictions queue (a multiprocessing queue),
# process them, and push final results into results queue (another
# multiprocessing queue). The main thread is responsible to push messages
# into predictions queue, and start a separate thread to continuously pull
# messages from results queue to update final results.
# Message in predictions queue should be a tuple of two elements:
# (evaluation step, predictions)
# Message in results queue should be a tuple of two elements:
# (evaluation step, final results)
self.async_eval_process = None
self.async_eval_predictions_queue = None
self.async_eval_results_queue = None
self.async_eval_results_getter_thread = None
# The MLPerf reference uses a starting lr of 1e-3 at bs=32.
self.base_lr_batch_size = 32
def skip_final_affine_layer(self):
return True
def gpu_preprocess_nhwc(self, images, phase_train=True):
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
if phase_train:
images = ssd_dataloader.color_jitter(
images, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)
images = ssd_dataloader.normalize_image(images)
return images
def add_backbone_model(self, cnn):
# --------------------------------------------------------------------------
# Resnet-34 backbone model -- modified for SSD
# --------------------------------------------------------------------------
# Input 300x300, output 150x150
cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
cnn.mpool(3, 3, 2, 2, mode='SAME')
resnet34_layers = [3, 4, 6, 3]
version = 'v1'
# ResNet-34 block group 1
# Input 150x150, output 75x75
for i in range(resnet34_layers[0]):
# Last argument forces residual_block to use projection shortcut, even
# though the numbers of input and output channels are equal
resnet_model.residual_block(cnn, 64, 1, version)
# ResNet-34 block group 2
# Input 75x75, output 38x38
for i in range(resnet34_layers[1]):
stride = 2 if i == 0 else 1
resnet_model.residual_block(cnn, 128, stride, version, i == 0)
# ResNet-34 block group 3
# This block group is modified: first layer uses stride=1 so that the image
# size does not change in group of layers
# Input 38x38, output 38x38
for i in range(resnet34_layers[2]):
# The following line is intentionally commented out to differentiate from
# the original ResNet-34 model
# stride = 2 if i == 0 else 1
resnet_model.residual_block(cnn, 256, stride, version, i == 0)
# ResNet-34 block group 4: removed final block group
# The following 3 lines are intentionally commented out to differentiate
# from the original ResNet-34 model
# for i in range(resnet34_layers[3]):
# stride = 2 if i == 0 else 1
# resnet_model.residual_block(cnn, 512, stride, version, i == 0)
def add_inference(self, cnn):
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': ssd_constants.BATCH_NORM_DECAY,
'epsilon': ssd_constants.BATCH_NORM_EPSILON,
'scale': True}
with tf.variable_scope(BACKBONE_MODEL_SCOPE_NAME):
self.add_backbone_model(cnn)
# --------------------------------------------------------------------------
# SSD additional layers
# --------------------------------------------------------------------------
def add_ssd_layer(cnn, depth, k_size, stride, mode):
return cnn.conv(
depth,
k_size,
k_size,
stride,
stride,
mode=mode,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
# Activations for feature maps of different layers
self.activations = [cnn.top_layer]
# Conv7_1, Conv7_2
# Input 38x38, output 19x19
add_ssd_layer(cnn, 256, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 512, 3, 2, 'same'))
# Conv8_1, Conv8_2
# Input 19x19, output 10x10
add_ssd_layer(cnn, 256, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 512, 3, 2, 'same'))
# Conv9_1, Conv9_2
# Input 10x10, output 5x5
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 2, 'same'))
# Conv10_1, Conv10_2
# Input 5x5, output 3x3
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 1, 'valid'))
# Conv11_1, Conv11_2
# Input 3x3, output 1x1
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 1, 'valid'))
self.loc = []
self.conf = []
for nd, ac, oc in zip(self.num_dboxes, self.activations, self.out_chan):
l = cnn.conv(
nd * 4,
3,
3,
1,
1,
input_layer=ac,
num_channels_in=oc,
activation=None,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
scale = l.get_shape()[-1]
# shape = [batch_size, nd * 4, scale, scale]
l = tf.reshape(l, [self.batch_size, nd, 4, scale, scale])
# shape = [batch_size, nd, 4, scale, scale]
l = tf.transpose(l, [0, 1, 3, 4, 2])
# shape = [batch_size, nd, scale, scale, 4]
self.loc.append(tf.reshape(l, [self.batch_size, -1, 4]))
# shape = [batch_size, nd * scale * scale, 4]
c = cnn.conv(
nd * self.label_num,
3,
3,
1,
1,
input_layer=ac,
num_channels_in=oc,
activation=None,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
# shape = [batch_size, nd * label_num, scale, scale]
c = tf.reshape(c, [self.batch_size, nd, self.label_num, scale, scale])
# shape = [batch_size, nd, label_num, scale, scale]
c = tf.transpose(c, [0, 1, 3, 4, 2])
# shape = [batch_size, nd, scale, scale, label_num]
self.conf.append(tf.reshape(c, [self.batch_size, -1, self.label_num]))
# shape = [batch_size, nd * scale * scale, label_num]
# Shape of locs: [batch_size, NUM_SSD_BOXES, 4]
# Shape of confs: [batch_size, NUM_SSD_BOXES, label_num]
locs, confs = tf.concat(self.loc, 1), tf.concat(self.conf, 1)
# Pack location and confidence outputs into a single output layer
# Shape of logits: [batch_size, NUM_SSD_BOXES, 4+label_num]
logits = tf.concat([locs, confs], 2)
cnn.top_layer = logits
cnn.top_size = 4 + self.label_num
return cnn.top_layer
def get_learning_rate(self, global_step, batch_size):
rescaled_lr = self.get_scaled_base_learning_rate(batch_size)
# Defined in MLPerf reference model
boundaries = [160000, 200000]
boundaries = [b * self.base_lr_batch_size // batch_size for b in boundaries]
decays = [1, 0.1, 0.01]
learning_rates = [rescaled_lr * d for d in decays]
lr = tf.train.piecewise_constant(global_step, boundaries, learning_rates)
warmup_steps = int(118287 / batch_size * 5)
warmup_lr = (
rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
def get_scaled_base_learning_rate(self, batch_size):
"""Calculates base learning rate for creating lr schedule.
In replicated mode, gradients are summed rather than averaged which, with
the sgd and momentum optimizers, increases the effective learning rate by
lr * num_gpus. Dividing the base lr by num_gpus negates the increase.
Args:
batch_size: Total batch-size.
Returns:
Base learning rate to use to create lr schedule.
"""
base_lr = self.learning_rate
if self.params.variable_update == 'replicated':
base_lr = self.learning_rate / self.params.num_gpus
scaled_lr = base_lr * (batch_size / self.base_lr_batch_size)
return scaled_lr
def _collect_backbone_vars(self):
backbone_vars = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope='.*'+ BACKBONE_MODEL_SCOPE_NAME)
var_list = {}
# Assume variables in the checkpoint are following the naming convention of
# a model checkpoint trained with TF official model
# TODO(haoyuzhang): the following variable name parsing is hacky and easy
# to break if there is change in naming convention of either benchmarks or
# official models.
for v in backbone_vars:
# conv2d variable example (model <-- checkpoint):
# v/cg/conv24/conv2d/kernel:0 <-- conv2d_24/kernel
if 'conv2d' in v.name:
re_match = re.search(r'conv(\d+)/conv2d/(.+):', v.name)
if re_match:
layer_id = int(re_match.group(1))
param_name = re_match.group(2)
vname_in_ckpt = self._var_name_in_official_model_ckpt(
'conv2d', layer_id, param_name)
var_list[vname_in_ckpt] = v
# batchnorm varariable example:
# v/cg/conv24/batchnorm25/gamma:0 <-- batch_normalization_25/gamma
elif 'batchnorm' in v.name:
re_match = re.search(r'batchnorm(\d+)/(.+):', v.name)
if re_match:
layer_id = int(re_match.group(1))
param_name = re_match.group(2)
vname_in_ckpt = self._var_name_in_official_model_ckpt(
'batch_normalization', layer_id, param_name)
var_list[vname_in_ckpt] = v
return var_list
def _var_name_in_official_model_ckpt(self, layer_name, layer_id, param_name):
"""Return variable names according to convention in TF official models."""
vname_in_ckpt = layer_name
if layer_id > 0:
vname_in_ckpt += '_' + str(layer_id)
vname_in_ckpt += '/' + param_name
return vname_in_ckpt
def loss_function(self, inputs, build_network_result):
logits = build_network_result.logits
# Unpack model output back to locations and confidence scores of predictions
# Shape of pred_loc: [batch_size, NUM_SSD_BOXES, 4]
# Shape of pred_label: [batch_size, NUM_SSD_BOXES, label_num]
pred_loc, pred_label = tf.split(logits, [4, self.label_num], 2)
# Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4]
# Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1]
# Shape of num_gt: [batch_size]
_, gt_loc, gt_label, num_gt = inputs
gt_label = tf.cast(gt_label, tf.int32)
box_loss = self._localization_loss(pred_loc, gt_loc, gt_label, num_gt)
class_loss = self._classification_loss(pred_label, gt_label, num_gt)
tf.summary.scalar('box_loss', tf.reduce_mean(box_loss))
tf.summary.scalar('class_loss', tf.reduce_mean(class_loss))
return class_loss + box_loss
def _localization_loss(self, pred_loc, gt_loc, gt_label, num_matched_boxes):
"""Computes the localization loss.
Computes the localization loss using smooth l1 loss.
Args:
pred_loc: a flatten tensor that includes all predicted locations. The
shape is [batch_size, num_anchors, 4].
gt_loc: a tensor representing box regression targets in
[batch_size, num_anchors, 4].
gt_label: a tensor that represents the classification groundtruth targets.
The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets, used as the loss normalizater. The shape is [batch_size].
Returns:
box_loss: a float32 representing total box regression loss.
"""
mask = tf.greater(tf.squeeze(gt_label), 0)
float_mask = tf.cast(mask, tf.float32)
smooth_l1 = tf.reduce_sum(tf.losses.huber_loss(
gt_loc, pred_loc,
reduction=tf.losses.Reduction.NONE
), axis=2)
smooth_l1 = tf.multiply(smooth_l1, float_mask)
box_loss = tf.reduce_sum(smooth_l1, axis=1)
return tf.reduce_mean(box_loss / num_matched_boxes)
def _classification_loss(self, pred_label, gt_label, num_matched_boxes):
"""Computes the classification loss.
Computes the classification loss with hard negative mining.
Args:
pred_label: a flatten tensor that includes all predicted class. The shape
is [batch_size, num_anchors, num_classes].
gt_label: a tensor that represents the classification groundtruth targets.
The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets. This is used as the loss normalizater.
Returns:
box_loss: a float32 representing total box regression loss.
"""
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
gt_label, pred_label, reduction=tf.losses.Reduction.NONE)
mask = tf.greater(tf.squeeze(gt_label), 0)
float_mask = tf.cast(mask, tf.float32)
# Hard example mining
neg_masked_cross_entropy = cross_entropy * (1 - float_mask)
relative_position = tf.argsort(
tf.argsort(
neg_masked_cross_entropy, direction='DESCENDING'))
num_neg_boxes = tf.minimum(
tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE,
ssd_constants.NUM_SSD_BOXES)
top_k_neg_mask = tf.cast(tf.less(
relative_position,
tf.tile(num_neg_boxes[:, tf.newaxis], (1, ssd_constants.NUM_SSD_BOXES))
), tf.float32)
class_loss = tf.reduce_sum(
tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1)
return tf.reduce_mean(class_loss / num_matched_boxes)
def add_backbone_saver(self):
# Create saver with mapping from variable names in checkpoint of backbone
# model to variables in SSD model
backbone_var_list = self._collect_backbone_vars()
self.backbone_savers.append(tf.train.Saver(backbone_var_list))
def load_backbone_model(self, sess, backbone_model_path):
for saver in self.backbone_savers:
saver.restore(sess, backbone_model_path)
def get_input_data_types(self, subset):
if subset == 'validation':
return [self.data_type, tf.float32, tf.float32, tf.float32, tf.int32]
return [self.data_type, tf.float32, tf.float32, tf.float32]
def get_input_shapes(self, subset):
"""Return encoded tensor shapes for train and eval data respectively."""
if subset == 'validation':
# Validation data shapes:
# 1. images
# 2. ground truth locations of boxes
# 3. ground truth classes of objects in boxes
# 4. source image IDs
# 5. raw image shapes
return [
[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, 4],
[self.batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, 1],
[self.batch_size],
[self.batch_size, 3],
]
# Training data shapes:
# 1. images
# 2. ground truth locations of boxes
# 3. ground truth classes of objects in boxes
# 4. numbers of objects in images
return [
[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 4],
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 1],
[self.batch_size]
]
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the mean precision of the model."""
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
from object_detection.box_coders import faster_rcnn_box_coder # pylint: disable=g-import-not-at-top
from object_detection.core import box_coder # pylint: disable=g-import-not-at-top
from object_detection.core import box_list # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
# Unpack model output back to locations and confidence scores of predictions
# pred_locs: relative locations (coordinates) of objects in all SSD boxes
# shape: [batch_size, NUM_SSD_BOXES, 4]
# pred_labels: confidence scores of objects being of all categories
# shape: [batch_size, NUM_SSD_BOXES, label_num]
pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2)
ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=ssd_constants.BOX_CODER_SCALES)
anchors = box_list.BoxList(
tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb')))
pred_boxes = box_coder.batch_decode(
encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors)
pred_scores = tf.nn.softmax(pred_labels, axis=2)
# TODO(haoyuzhang): maybe use `gt_boxes` and `gt_classes` for visualization.
_, gt_boxes, gt_classes, source_id, raw_shape = inputs # pylint: disable=unused-variable
return {
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.PRED_BOXES): pred_boxes,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.PRED_SCORES): pred_scores,
# TODO(haoyuzhang): maybe use these values for visualization.
# constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes,
# constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.SOURCE_ID): source_id,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.RAW_SHAPE): raw_shape
}
def postprocess(self, results):
"""Postprocess results returned from model."""
try:
import coco_metric # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
pred_boxes = results[ssd_constants.PRED_BOXES]
pred_scores = results[ssd_constants.PRED_SCORES]
# TODO(haoyuzhang): maybe use these values for visualization.
# gt_boxes = results['gt_boxes']
# gt_classes = results['gt_classes']
source_id = results[ssd_constants.SOURCE_ID]
raw_shape = results[ssd_constants.RAW_SHAPE]
# COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due
# to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting
# `num_eval_epochs` to 1 is not enough and will often miss some images. We
# expect user to set `num_eval_epochs` to >1, which will leave some unused
# images from previous steps in `predictions`. Here we check if we are doing
# eval at a new global step.
if results['global_step'] > self.eval_global_step:
self.eval_global_step = results['global_step']
self.predictions.clear()
for i, sid in enumerate(source_id):
self.predictions[int(sid)] = {
ssd_constants.PRED_BOXES: pred_boxes[i],
ssd_constants.PRED_SCORES: pred_scores[i],
ssd_constants.SOURCE_ID: source_id[i],
ssd_constants.RAW_SHAPE: raw_shape[i]
}
# COCO metric calculates mAP only after a full epoch of evaluation. Return
# dummy results for top_N_accuracy to be compatible with benchmar_cnn.py.
if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES:
log_fn('Got results for all {:d} eval examples. Calculate mAP...'.format(
ssd_constants.COCO_NUM_VAL_IMAGES))
annotation_file = os.path.join(self.params.data_dir,
ssd_constants.ANNOTATION_FILE)
# Size of predictions before decoding about 15--30GB, while size after
# decoding is 100--200MB. When using async eval mode, decoding takes
# 20--30 seconds of main thread time but is necessary to avoid OOM during
# inter-process communication.
decoded_preds = coco_metric.decode_predictions(self.predictions.values())
self.predictions.clear()
if self.params.collect_eval_results_async:
def _eval_results_getter():
"""Iteratively get eval results from async eval process."""
while True:
step, eval_results = self.async_eval_results_queue.get()
self.eval_coco_ap = eval_results['COCO/AP']
mlperf.logger.log_eval_accuracy(
self.eval_coco_ap, step, self.batch_size * self.params.num_gpus,
ssd_constants.COCO_NUM_TRAIN_IMAGES)
if self.reached_target():
# Reached target, clear all pending messages in predictions queue
# and insert poison pill to stop the async eval process.
while not self.async_eval_predictions_queue.empty():
self.async_eval_predictions_queue.get()
self.async_eval_predictions_queue.put('STOP')
break
if not self.async_eval_process:
# Limiting the number of messages in predictions queue to prevent OOM.
# Each message (predictions data) can potentially consume a lot of
# memory, and normally there should only be few messages in the queue.
# If often blocked on this, consider reducing eval frequency.
self.async_eval_predictions_queue = multiprocessing.Queue(2)
self.async_eval_results_queue = multiprocessing.Queue()
# Reason to use a Process as opposed to Thread is mainly the
# computationally intensive eval runner. Python multithreading is not
# truly running in parallel, a runner thread would get significantly
# delayed (or alternatively delay the main thread).
self.async_eval_process = multiprocessing.Process(
target=coco_metric.async_eval_runner,
args=(self.async_eval_predictions_queue,
self.async_eval_results_queue,
annotation_file))
self.async_eval_process.daemon = True
self.async_eval_process.start()
self.async_eval_results_getter_thread = threading.Thread(
target=_eval_results_getter, args=())
self.async_eval_results_getter_thread.daemon = True
self.async_eval_results_getter_thread.start()
self.async_eval_predictions_queue.put(
(self.eval_global_step, decoded_preds))
return {'top_1_accuracy': 0, 'top_5_accuracy': 0.}
eval_results = coco_metric.compute_map(decoded_preds, annotation_file)
self.eval_coco_ap = eval_results['COCO/AP']
ret = {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
for metric_key, metric_value in eval_results.items():
ret[constants.SIMPLE_VALUE_RESULT_PREFIX + metric_key] = metric_value
mlperf.logger.log_eval_accuracy(self.eval_coco_ap, self.eval_global_step,
self.batch_size * self.params.num_gpus,
ssd_constants.COCO_NUM_TRAIN_IMAGES)
return ret
log_fn('Got {:d} out of {:d} eval examples.'
' Waiting for the remaining to calculate mAP...'.format(
len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES))
return {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
def get_synthetic_inputs(self, input_name, nclass):
"""Generating synthetic data matching real data shape and type."""
inputs = tf.random_uniform(
self.get_input_shapes('train')[0], dtype=self.data_type)
inputs = variables.VariableV1(inputs, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
boxes = tf.random_uniform(
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 4], dtype=tf.float32)
classes = tf.random_uniform(
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 1], dtype=tf.float32)
nboxes = tf.random_uniform(
[self.batch_size], minval=1, maxval=10, dtype=tf.float32)
return (inputs, boxes, classes, nboxes)
def reached_target(self):
return (self.params.stop_at_top_1_accuracy and
self.eval_coco_ap >= self.params.stop_at_top_1_accuracy)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Trivial model configuration."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from models import model
class TrivialModel(model.CNNModel):
"""Trivial model configuration."""
def __init__(self, params=None):
super(TrivialModel, self).__init__(
'trivial', 224 + 3, 32, 0.005, params=params)
def add_inference(self, cnn):
cnn.reshape([-1, 227 * 227 * 3])
cnn.affine(1)
cnn.affine(4096)
class TrivialCifar10Model(model.CNNModel):
"""Trivial cifar10 model configuration."""
def __init__(self, params=None):
super(TrivialCifar10Model, self).__init__(
'trivial', 32, 32, 0.005, params=params)
def add_inference(self, cnn):
cnn.reshape([-1, 32 * 32 * 3])
cnn.affine(1)
cnn.affine(4096)
class TrivialSSD300Model(model.CNNModel):
"""Trivial SSD300 model configuration."""
def __init__(self, params=None):
super(TrivialSSD300Model, self).__init__(
'trivial', 300, params.batch_size, 0.005, params=params)
def add_inference(self, cnn):
cnn.reshape([-1, 300 * 300 * 3])
cnn.affine(1)
cnn.affine(4096)
def get_input_shapes(self, subset):
return [[self.batch_size, 300, 300, 3],
[self.batch_size, 8732, 4],
[self.batch_size, 8732, 1],
[self.batch_size]]
def loss_function(self, inputs, build_network_result):
images, _, _, labels = inputs
labels = tf.cast(labels, tf.int32)
return super(TrivialSSD300Model, self).loss_function(
(images, labels), build_network_result)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Vgg model configuration.
Includes multiple models: vgg11, vgg16, vgg19, corresponding to
model A, D, and E in Table 1 of [1].
References:
[1] Simonyan, Karen, Andrew Zisserman
Very Deep Convolutional Networks for Large-Scale Image Recognition
arXiv:1409.1556 (2014)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange # pylint: disable=redefined-builtin
from models import model
def _construct_vgg(cnn, num_conv_layers):
"""Build vgg architecture from blocks."""
assert len(num_conv_layers) == 5
for _ in xrange(num_conv_layers[0]):
cnn.conv(64, 3, 3)
cnn.mpool(2, 2)
for _ in xrange(num_conv_layers[1]):
cnn.conv(128, 3, 3)
cnn.mpool(2, 2)
for _ in xrange(num_conv_layers[2]):
cnn.conv(256, 3, 3)
cnn.mpool(2, 2)
for _ in xrange(num_conv_layers[3]):
cnn.conv(512, 3, 3)
cnn.mpool(2, 2)
for _ in xrange(num_conv_layers[4]):
cnn.conv(512, 3, 3)
cnn.mpool(2, 2)
cnn.reshape([-1, 512 * 7 * 7])
cnn.affine(4096)
cnn.dropout()
cnn.affine(4096)
cnn.dropout()
class Vgg11Model(model.CNNModel):
def __init__(self, params=None):
super(Vgg11Model, self).__init__('vgg11', 224, 64, 0.005, params=params)
def add_inference(self, cnn):
_construct_vgg(cnn, [1, 1, 2, 2, 2])
class Vgg16Model(model.CNNModel):
def __init__(self, params=None):
super(Vgg16Model, self).__init__('vgg16', 224, 64, 0.005, params=params)
def add_inference(self, cnn):
_construct_vgg(cnn, [2, 2, 3, 3, 3])
class Vgg19Model(model.CNNModel):
def __init__(self, params=None):
super(Vgg19Model, self).__init__('vgg19', 224, 64, 0.005, params=params)
def add_inference(self, cnn):
_construct_vgg(cnn, [2, 2, 4, 4, 4])
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility code for the default platform."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import tempfile
import cnn_util
from models import model_config
_ROOT_PROJECT_DIR = os.path.dirname(cnn_util.__file__)
def define_platform_params():
"""Defines platform-specific parameters.
Currently there are no platform-specific parameters to be defined.
"""
pass
def get_cluster_manager(params, config_proto):
"""Returns the cluster manager to be used."""
return cnn_util.GrpcClusterManager(params, config_proto)
def get_command_to_run_python_module(module):
"""Returns a command to run a Python module."""
python_interpretter = sys.executable
if not python_interpretter:
raise ValueError('Could not find Python interpreter')
return [python_interpretter,
os.path.join(_ROOT_PROJECT_DIR, module + '.py')]
def get_test_output_dir():
"""Returns a directory where test outputs should be placed."""
base_dir = os.environ.get('TEST_OUTPUTS_DIR',
'/tmp/tf_cnn_benchmarks_test_outputs')
if not os.path.exists(base_dir):
os.mkdir(base_dir)
return tempfile.mkdtemp(dir=base_dir)
def get_test_data_dir():
"""Returns the path to the test_data directory."""
return os.path.join(_ROOT_PROJECT_DIR, 'test_data')
def get_ssd_backborn_model_file():
raise NotImplementedError
def get_ssd_backboard_data_dir():
raise NotImplementedError
def _initialize(params, config_proto):
del params, config_proto
model_config.register_tf1_models()
_is_initalized = False
def initialize(params, config_proto):
global _is_initalized
if _is_initalized:
return
_is_initalized = True
_initialize(params, config_proto)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility code for a certain platform.
This file simply imports everything from the default platform. To switch to a
different platform, the import statement can be changed to point to a new
platform.
Creating a custom platform can be useful to, e.g., run some initialization code
required by the platform or register a platform-specific model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from platforms.default.util import * # pylint: disable=unused-import,wildcard-import
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image pre-processing utilities.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
# pylint: disable=g-direct-tensorflow-import
import cnn_util
from tensorflow.python.data.ops import multi_device_iterator_ops
from tensorflow.python.framework import function
from tensorflow.python.layers import utils
from tensorflow.python.ops import data_flow_ops
from tensorflow.python.platform import gfile
import mlperf
def parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields:
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
text: Tensor tf.string containing the human-readable label.
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update(
{k: sparse_float32 for k in ['image/object/bbox/xmin',
'image/object/bbox/ymin',
'image/object/bbox/xmax',
'image/object/bbox/ymax']})
features = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(bbox, [0, 2, 1])
return features['image/encoded'], label, bbox, features['image/class/text']
_RESIZE_METHOD_MAP = {
'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
'bilinear': tf.image.ResizeMethod.BILINEAR,
'bicubic': tf.image.ResizeMethod.BICUBIC,
'area': tf.image.ResizeMethod.AREA
}
def get_image_resize_method(resize_method, batch_position=0):
"""Get tensorflow resize method.
If resize_method is 'round_robin', return different methods based on batch
position in a round-robin fashion. NOTE: If the batch size is not a multiple
of the number of methods, then the distribution of methods will not be
uniform.
Args:
resize_method: (string) nearest, bilinear, bicubic, area, or round_robin.
batch_position: position of the image in a batch. NOTE: this argument can
be an integer or a tensor
Returns:
one of resize type defined in tf.image.ResizeMethod.
"""
if resize_method != 'round_robin':
return _RESIZE_METHOD_MAP[resize_method]
# return a resize method based on batch position in a round-robin fashion.
resize_methods = list(_RESIZE_METHOD_MAP.values())
def lookup(index):
return resize_methods[index]
def resize_method_0():
return utils.smart_cond(batch_position % len(resize_methods) == 0,
lambda: lookup(0), resize_method_1)
def resize_method_1():
return utils.smart_cond(batch_position % len(resize_methods) == 1,
lambda: lookup(1), resize_method_2)
def resize_method_2():
return utils.smart_cond(batch_position % len(resize_methods) == 2,
lambda: lookup(2), lambda: lookup(3))
# NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here
# because TF would not be able to construct a finite graph.
return resize_method_0()
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
def normalized_image(images):
# Rescale from [0, 255] to [0, 2]
images = tf.multiply(images, 1. / 127.5)
# Rescale to [-1, 1]
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION, value=[1.0] * 3)
return tf.subtract(images, 1.0)
def eval_image(image,
height,
width,
batch_position,
resize_method,
summary_verbosity=0):
"""Get the image for model evaluation.
We preprocess the image simiarly to Slim, see
https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py
Validation images do not have bounding boxes, so to crop the image, we first
resize the image such that the aspect ratio is maintained and the resized
height and width are both at least 1.145 times `height` and `width`
respectively. Then, we do a central crop to size (`height`, `width`).
Args:
image: 3-D float Tensor representing the image.
height: The height of the image that will be returned.
width: The width of the image that will be returned.
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: one of the strings 'round_robin', 'nearest', 'bilinear',
'bicubic', or 'area'.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
Returns:
An image of size (output_height, output_width, 3) that is resized and
cropped as described above.
"""
# TODO(reedwm): Currently we resize then crop. Investigate if it's faster to
# crop then resize.
with tf.name_scope('eval_image'):
if summary_verbosity >= 3:
tf.summary.image(
'original_image', tf.expand_dims(image, 0))
shape = tf.shape(image)
image_height = shape[0]
image_width = shape[1]
image_height_float = tf.cast(image_height, tf.float32)
image_width_float = tf.cast(image_width, tf.float32)
# This value is chosen so that in resnet, images are cropped to a size of
# 256 x 256, which matches what other implementations do. The final image
# size for resnet is 224 x 224, and floor(224 * 1.145) = 256.
scale_factor = 1.145
# Compute resize_height and resize_width to be the minimum values such that
# 1. The aspect ratio is maintained (i.e. resize_height / resize_width is
# image_height / image_width), and
# 2. resize_height >= height * `scale_factor`, and
# 3. resize_width >= width * `scale_factor`
max_ratio = tf.maximum(height / image_height_float,
width / image_width_float)
resize_height = tf.cast(image_height_float * max_ratio * scale_factor,
tf.int32)
resize_width = tf.cast(image_width_float * max_ratio * scale_factor,
tf.int32)
mlperf.logger.log_input_resize_aspect_preserving(height, width,
scale_factor)
# Resize the image to shape (`resize_height`, `resize_width`)
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(image,
[resize_height, resize_width],
image_resize_method,
align_corners=False)
# Do a central crop of the image to size (height, width).
# MLPerf requires us to log (height, width) with two different keys.
mlperf.logger.log(key=mlperf.tags.INPUT_CENTRAL_CROP, value=[height, width])
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
total_crop_height = (resize_height - height)
crop_top = total_crop_height // 2
total_crop_width = (resize_width - width)
crop_left = total_crop_width // 2
distorted_image = tf.slice(distorted_image, [crop_top, crop_left, 0],
[height, width, 3])
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image(
'cropped_resized_image', tf.expand_dims(distorted_image, 0))
image = distorted_image
return image
def train_image(image_buffer,
height,
width,
bbox,
batch_position,
resize_method,
distortions,
scope=None,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False):
"""Distort one image for training a network.
Distorting images provides a useful technique for augmenting the data
set during training in order to make the network invariant to aspects
of the image that do not effect the label.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
height: integer
width: integer
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged
as [ymin, xmin, ymax, xmax].
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: round_robin, nearest, bilinear, bicubic, or area.
distortions: If true, apply full distortions for image colors.
scope: Optional scope for op_scope.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
distort_color_in_yiq: distort color of input images in YIQ space.
fuse_decode_and_crop: fuse the decode/crop operation.
Returns:
3-D float Tensor of distorted image used for training.
"""
# with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
# with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
with tf.name_scope(scope or 'distort_image'):
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
min_object_covered = 0.1
aspect_ratio_range = [0.75, 1.33]
area_range = [0.05, 1.0]
max_attempts = 100
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV,
value=min_object_covered)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE,
value=aspect_ratio_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE,
value=area_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS,
value=max_attempts)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
if summary_verbosity >= 3:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image_with_distorted_box = tf.image.draw_bounding_boxes(
tf.expand_dims(image, 0), distort_bbox)
tf.summary.image(
'images_with_distorted_bounding_box',
image_with_distorted_box)
# Crop the image to the specified bounding box.
if fuse_decode_and_crop:
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
image = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=3)
else:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.slice(image, bbox_begin, bbox_size)
mlperf.logger.log(key=mlperf.tags.INPUT_RANDOM_FLIP)
distorted_image = tf.image.random_flip_left_right(image)
# This resizing operation may distort the images because the aspect
# ratio is not respected.
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(
distorted_image, [height, width],
image_resize_method,
align_corners=False)
# Restore the shape since the dynamic slice based upon the bbox_size loses
# the third dimension.
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image('cropped_resized_maybe_flipped_image',
tf.expand_dims(distorted_image, 0))
if distortions:
distorted_image = tf.cast(distorted_image, dtype=tf.float32)
# Images values are expected to be in [0,1] for color distortion.
distorted_image /= 255.
# Randomly distort the colors.
distorted_image = distort_color(distorted_image, batch_position,
distort_color_in_yiq=distort_color_in_yiq)
# Note: This ensures the scaling matches the output of eval_image
distorted_image *= 255
if summary_verbosity >= 3:
tf.summary.image(
'final_distorted_image',
tf.expand_dims(distorted_image, 0))
return distorted_image
def distort_color(image, batch_position=0, distort_color_in_yiq=False,
scope=None):
"""Distort the color of the image.
Each color distortion is non-commutative and thus ordering of the color ops
matters. Ideally we would randomly permute the ordering of the color ops.
Rather then adding that level of complication, we select a distinct ordering
of color ops based on the position of the image in a batch.
Args:
image: float32 Tensor containing single image. Tensor values should be in
range [0, 1].
batch_position: the position of the image in a batch. NOTE: this argument
can be an integer or a tensor
distort_color_in_yiq: distort color of input images in YIQ space.
scope: Optional scope for op_scope.
Returns:
color-distorted image
"""
if distort_color_in_yiq:
try:
from tensorflow.contrib.image.python.ops import distort_image_ops # pylint: disable=g-import-not-at-top
except ImportError:
raise ValueError(
'In TF2, you cannot pass --distortions unless you also pass '
'--nodistort_color_in_yiq. This is because the random_hsv_in_yiq was '
'removed in TF2. --distortions does not improve accuracy on resnet '
'so it is not recommended. --nodistort_color_in_yiq also has no '
'impact on accuracy, but may hurt performance.')
with tf.name_scope(scope or 'distort_color'):
def distort_fn_0(image=image):
"""Variant 0 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
return image
def distort_fn_1(image=image):
"""Variant 1 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
return image
image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0,
distort_fn_1)
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
class InputPreprocessor(object):
"""Base class for all model preprocessors."""
def __init__(self, batch_size, output_shapes):
self.batch_size = batch_size
self.output_shapes = output_shapes
def supports_datasets(self):
"""Whether this preprocessor supports dataset."""
return False
def minibatch(self, dataset, subset, params, shift_ratio=-1):
"""Returns tensors representing a minibatch of all the input."""
raise NotImplementedError('Must be implemented by subclass.')
# The methods added below are only supported/used if supports_datasets()
# returns True.
# TODO(laigd): refactor benchmark_cnn.py and put the logic of
# _build_input_processing() into InputPreprocessor.
def parse_and_preprocess(self, value, batch_position):
"""Function to parse and preprocess an Example proto in input pipeline."""
raise NotImplementedError('Must be implemented by subclass.')
# TODO(laigd): figure out how to remove these parameters, since the
# preprocessor itself has self.batch_size, self.num_splits, etc defined.
def build_multi_device_iterator(self, batch_size, num_splits, cpu_device,
params, gpu_devices, dataset, doing_eval):
"""Creates a MultiDeviceIterator."""
assert self.supports_datasets()
assert num_splits == len(gpu_devices)
with tf.name_scope('batch_processing'):
if doing_eval:
subset = 'validation'
else:
subset = 'train'
batch_size_per_split = batch_size // num_splits
ds = self.create_dataset(
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train=(not doing_eval),
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
ds,
gpu_devices,
source_device=cpu_device,
max_buffer_size=params.multi_device_iterator_max_buffer_size)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
multi_device_iterator.initializer)
return multi_device_iterator
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
raise NotImplementedError('Must be implemented by subclass.')
def create_iterator(self, ds):
ds_iterator = tf.data.make_initializable_iterator(ds)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
ds_iterator.initializer)
return ds_iterator
def minibatch_fn(self, batch_size, model_input_shapes, num_splits,
dataset, subset, train, datasets_repeat_cached_sample,
num_threads, datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch):
"""Returns a function and list of args for the fn to create a minibatch."""
assert self.supports_datasets()
batch_size_per_split = batch_size // num_splits
assert batch_size_per_split == model_input_shapes[0][0]
with tf.name_scope('batch_processing'):
ds = self.create_dataset(batch_size, num_splits, batch_size_per_split,
dataset, subset, train,
datasets_repeat_cached_sample, num_threads,
datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch)
ds_iterator = self.create_iterator(ds)
ds_iterator_string_handle = ds_iterator.string_handle()
@function.Defun(tf.string)
def _fn(h):
remote_iterator = tf.data.Iterator.from_string_handle(
h, ds_iterator.output_types, ds_iterator.output_shapes)
input_list = remote_iterator.get_next()
reshaped_input_list = [
tf.reshape(input_list[i], shape=model_input_shapes[i])
for i in range(len(input_list))
]
return reshaped_input_list
return _fn, [ds_iterator_string_handle]
class BaseImagePreprocessor(InputPreprocessor):
"""Base class for all image model preprocessors."""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train,
distortions,
resize_method,
shift_ratio=-1,
summary_verbosity=0,
distort_color_in_yiq=True,
fuse_decode_and_crop=True,
match_mlperf=False):
super(BaseImagePreprocessor, self).__init__(batch_size, output_shapes)
image_shape = output_shapes[0]
# image_shape is in form (batch_size, height, width, depth)
self.height = image_shape[1]
self.width = image_shape[2]
self.depth = image_shape[3]
self.num_splits = num_splits
self.dtype = dtype
self.train = train
self.resize_method = resize_method
self.shift_ratio = shift_ratio
self.distortions = distortions
self.distort_color_in_yiq = distort_color_in_yiq
self.fuse_decode_and_crop = fuse_decode_and_crop
if self.batch_size % self.num_splits != 0:
raise ValueError(
('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') %
(self.batch_size, self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
self.summary_verbosity = summary_verbosity
self.match_mlperf = match_mlperf
def parse_and_preprocess(self, value, batch_position):
assert self.supports_datasets()
image_buffer, label_index, bbox, _ = parse_example_proto(value)
if self.match_mlperf:
bbox = tf.zeros((1, 0, 4), dtype=bbox.dtype)
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=False)
else:
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=True)
image = self.preprocess(image_buffer, bbox, batch_position)
return (image, label_index)
def preprocess(self, image_buffer, bbox, batch_position):
raise NotImplementedError('Must be implemented by subclass.')
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
buffer_size = 10000
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=buffer_size)
ds = ds.apply(
tf.data.experimental.shuffle_and_repeat(buffer_size=buffer_size))
else:
ds = ds.repeat()
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.parse_and_preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
class RecordInputImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for images with RecordInput format."""
def preprocess(self, image_buffer, bbox, batch_position):
"""Preprocessing image_buffer as a function of its batch position."""
if self.train:
image = train_image(image_buffer, self.height, self.width, bbox,
batch_position, self.resize_method, self.distortions,
None, summary_verbosity=self.summary_verbosity,
distort_color_in_yiq=self.distort_color_in_yiq,
fuse_decode_and_crop=self.fuse_decode_and_crop)
else:
image = tf.image.decode_jpeg(
image_buffer, channels=3, dct_method='INTEGER_FAST')
image = eval_image(image, self.height, self.width, batch_position,
self.resize_method,
summary_verbosity=self.summary_verbosity)
# Note: image is now float32 [height,width,3] with range [0, 255]
# image = tf.cast(image, tf.uint8) # HACK TESTING
if self.match_mlperf:
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION,
value=_CHANNEL_MEANS)
normalized = image - _CHANNEL_MEANS
else:
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
if shift_ratio < 0:
shift_ratio = self.shift_ratio
with tf.name_scope('batch_processing'):
# Build final results per split.
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
if params.use_datasets:
ds = self.create_dataset(
self.batch_size, self.num_splits, self.batch_size_per_split,
dataset, subset, self.train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
for d in xrange(self.num_splits):
images[d], labels[d] = ds_iterator.get_next()
# TODO(laigd): consider removing the --use_datasets option, it should
# always use datasets.
else:
record_input = data_flow_ops.RecordInput(
file_pattern=dataset.tf_record_pattern(subset),
seed=301,
parallelism=64,
buffer_size=10000,
batch_size=self.batch_size,
shift_ratio=shift_ratio,
name='record_input')
records = record_input.get_yield_op()
records = tf.split(records, self.batch_size, 0)
records = [tf.reshape(record, []) for record in records]
for idx in xrange(self.batch_size):
value = records[idx]
(image, label) = self.parse_and_preprocess(value, idx)
split_index = idx % self.num_splits
labels[split_index].append(label)
images[split_index].append(image)
for split_index in xrange(self.num_splits):
if not params.use_datasets:
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.concat(labels[split_index], 0)
images[split_index] = tf.reshape(
images[split_index],
shape=[self.batch_size_per_split, self.height, self.width,
self.depth])
labels[split_index] = tf.reshape(labels[split_index],
[self.batch_size_per_split])
return images, labels
def supports_datasets(self):
return True
class ImagenetPreprocessor(RecordInputImagePreprocessor):
def preprocess(self, image_buffer, bbox, batch_position):
# pylint: disable=g-import-not-at-top
try:
from official.r1.resnet.imagenet_preprocessing import preprocess_image
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
if self.train:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=True)
else:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=False)
return tf.cast(image, self.dtype)
class Cifar10ImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for Cifar10 input images."""
def _distort_image(self, image):
"""Distort one image for training a network.
Adopted the standard data augmentation scheme that is widely used for
this dataset: the images are first zero-padded with 4 pixels on each side,
then randomly cropped to again produce distorted images; half of the images
are then horizontally mirrored.
Args:
image: input image.
Returns:
distorted image.
"""
image = tf.image.resize_image_with_crop_or_pad(
image, self.height + 8, self.width + 8)
distorted_image = tf.random_crop(image,
[self.height, self.width, self.depth])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
if self.summary_verbosity >= 3:
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
return distorted_image
def _eval_image(self, image):
"""Get the image for model evaluation."""
distorted_image = tf.image.resize_image_with_crop_or_pad(
image, self.width, self.height)
if self.summary_verbosity >= 3:
tf.summary.image('cropped.image', tf.expand_dims(distorted_image, 0))
return distorted_image
def preprocess(self, raw_image):
"""Preprocessing raw image."""
if self.summary_verbosity >= 3:
tf.summary.image('raw.image', tf.expand_dims(raw_image, 0))
if self.train and self.distortions:
image = self._distort_image(raw_image)
else:
image = self._eval_image(raw_image)
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
# TODO(jsimsa): Implement datasets code path
del shift_ratio, params
with tf.name_scope('batch_processing'):
all_images, all_labels = dataset.read_data_files(subset)
all_images = tf.constant(all_images)
all_labels = tf.constant(all_labels)
input_image, input_label = tf.train.slice_input_producer(
[all_images, all_labels])
input_image = tf.cast(input_image, self.dtype)
input_label = tf.cast(input_label, tf.int32)
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(dataset.num_examples_per_epoch(subset) *
min_fraction_of_examples_in_queue)
raw_images, raw_labels = tf.train.shuffle_batch(
[input_image, input_label], batch_size=self.batch_size,
capacity=min_queue_examples + 3 * self.batch_size,
min_after_dequeue=min_queue_examples)
images = [[] for i in range(self.num_splits)]
labels = [[] for i in range(self.num_splits)]
# Create a list of size batch_size, each containing one image of the
# batch. Without the unstack call, raw_images[i] would still access the
# same image via a strided_slice op, but would be slower.
raw_images = tf.unstack(raw_images, axis=0)
raw_labels = tf.unstack(raw_labels, axis=0)
for i in xrange(self.batch_size):
split_index = i % self.num_splits
# The raw image read from data has the format [depth, height, width]
# reshape to the format returned by minibatch.
raw_image = tf.reshape(raw_images[i],
[dataset.depth, dataset.height, dataset.width])
raw_image = tf.transpose(raw_image, [1, 2, 0])
image = self.preprocess(raw_image)
images[split_index].append(image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
return images, labels
class COCOPreprocessor(BaseImagePreprocessor):
"""Preprocessor for COCO dataset input images, boxes, and labels."""
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
del shift_ratio # Not used when using datasets instead of data_flow_ops
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
batch_size=self.batch_size,
num_splits=self.num_splits,
batch_size_per_split=self.batch_size_per_split,
dataset=dataset,
subset=subset,
train=self.train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
# Training data: 4 tuple
# Validation data: 5 tuple
# See get_input_shapes in models/ssd_model.py for details.
input_len = 4 if subset == 'train' else 5
input_lists = [[None for _ in range(self.num_splits)]
for _ in range(input_len)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(input_len):
input_lists[i][d] = input_list[i]
return input_lists
def preprocess(self, data):
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
import ssd_constants # pylint: disable=g-import-not-at-top
from object_detection.core import preprocessor # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
image_buffer = data['image_buffer']
boxes = data['groundtruth_boxes']
classes = tf.reshape(data['groundtruth_classes'], [-1, 1])
source_id = tf.string_to_number(data['source_id'])
raw_shape = data['raw_shape']
ssd_encoder = ssd_dataloader.Encoder()
# Only 80 of the 90 COCO classes are used.
class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
classes = tf.gather(class_map, classes)
classes = tf.cast(classes, dtype=tf.float32)
if self.train:
image, boxes, classes = ssd_dataloader.ssd_decode_and_crop(
image_buffer, boxes, classes, raw_shape)
# ssd_crop resizes and returns image of dtype float32 and does not change
# its range (i.e., value in between 0--255). Divide by 255. converts it
# to [0, 1] range. Not doing this before cropping to avoid dtype cast
# (which incurs additional memory copy).
image /= 255.
image, boxes = preprocessor.random_horizontal_flip(
image=image, boxes=boxes)
# Random horizontal flip probability is 50%
# See https://github.com/tensorflow/models/blob/master/research/object_detection/core/preprocessor.py # pylint: disable=line-too-long
mlperf.logger.log(key=mlperf.tags.RANDOM_FLIP_PROBABILITY, value=0.5)
image = tf.cast(image, self.dtype)
encoded_returns = ssd_encoder.encode_labels(boxes, classes)
encoded_classes, encoded_boxes, num_matched_boxes = encoded_returns
# Shape of image: [width, height, channel]
# Shape of encoded_boxes: [NUM_SSD_BOXES, 4]
# Shape of encoded_classes: [NUM_SSD_BOXES, 1]
# Shape of num_matched_boxes: [1]
return (image, encoded_boxes, encoded_classes, num_matched_boxes)
else:
image = tf.image.decode_jpeg(image_buffer)
image = tf.image.resize_images(
image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE))
# resize_image returns image of dtype float32 and does not change its
# range. Divide by 255 to convert image to [0, 1] range.
image /= 255.
image = ssd_dataloader.normalize_image(image)
image = tf.cast(image, self.dtype)
def trim_and_pad(inp_tensor):
"""Limit the number of boxes, and pad if necessary."""
inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
return tf.reshape(inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES,
inp_tensor.get_shape()[1]])
boxes, classes = trim_and_pad(boxes), trim_and_pad(classes)
# Shape of boxes: [MAX_NUM_EVAL_BOXES, 4]
# Shape of classes: [MAX_NUM_EVAL_BOXES, 1]
# Shape of source_id: [] (scalar tensor)
# Shape of raw_shape: [3]
return (image, boxes, classes, source_id, raw_shape)
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
ds = tf.data.TFRecordDataset.list_files(glob_pattern, shuffle=train)
# TODO(haoyuzhang): Enable map+filter fusion after cl/218399112 in release
# options = tf.data.Options()
# options.experimental_optimization = tf.data.experimental.OptimizationOptions() # pylint: disable=line-too-long
# options.experimental_optimization.map_and_filter_fusion = True
# ds = ds.with_options(options)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave))
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=10000)
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
else:
ds = ds.repeat()
ds = ds.map(ssd_dataloader.ssd_parse_example_proto, num_parallel_calls=64)
ds = ds.filter(
lambda data: tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0))
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits,
drop_remainder=train))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
def supports_datasets(self):
return True
class TestImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor used for testing.
set_fake_data() sets which images and labels will be output by minibatch(),
and must be called before minibatch(). This allows tests to easily specify
a set of images to use for training, without having to create any files.
Queue runners must be started for this preprocessor to work.
"""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train=None,
distortions=None,
resize_method=None,
shift_ratio=0,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False,
match_mlperf=False):
super(TestImagePreprocessor, self).__init__(
batch_size, output_shapes, num_splits, dtype, train, distortions,
resize_method, shift_ratio, summary_verbosity=summary_verbosity,
distort_color_in_yiq=distort_color_in_yiq,
fuse_decode_and_crop=fuse_decode_and_crop, match_mlperf=match_mlperf)
self.expected_subset = None
def set_fake_data(self, fake_images, fake_labels):
assert len(fake_images.shape) == 4
assert len(fake_labels.shape) == 1
num_images = fake_images.shape[0]
assert num_images == fake_labels.shape[0]
assert num_images % self.batch_size == 0
self.fake_images = fake_images
self.fake_labels = fake_labels
def minibatch(self,
dataset,
subset,
params,
shift_ratio=0):
"""Get test image batches."""
del dataset, params
if (not hasattr(self, 'fake_images') or
not hasattr(self, 'fake_labels')):
raise ValueError('Must call set_fake_data() before calling minibatch '
'on TestImagePreprocessor')
if self.expected_subset is not None:
assert subset == self.expected_subset
shift_ratio = shift_ratio or self.shift_ratio
fake_images = cnn_util.roll_numpy_batches(self.fake_images, self.batch_size,
shift_ratio)
fake_labels = cnn_util.roll_numpy_batches(self.fake_labels, self.batch_size,
shift_ratio)
with tf.name_scope('batch_processing'):
image_slice, label_slice = tf.train.slice_input_producer(
[fake_images, fake_labels],
shuffle=False,
name='image_slice')
raw_images, raw_labels = tf.train.batch(
[image_slice, label_slice], batch_size=self.batch_size,
name='image_batch')
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
for i in xrange(self.batch_size):
split_index = i % self.num_splits
raw_image = tf.cast(raw_images[i], self.dtype)
images[split_index].append(raw_image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
normalized = [normalized_image(part) for part in images]
return [[tf.cast(part, self.dtype) for part in normalized], labels]
class LibrispeechPreprocessor(InputPreprocessor):
"""Preprocessor for librispeech class for all image model preprocessors."""
def __init__(self, batch_size, output_shapes, num_splits, dtype, train,
**kwargs):
del kwargs
super(LibrispeechPreprocessor, self).__init__(batch_size, output_shapes)
self.num_splits = num_splits
self.dtype = dtype
self.is_train = train
if self.batch_size % self.num_splits != 0:
raise ValueError(('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') % (self.batch_size,
self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
# TODO(laigd): currently the only difference between this and the one in
# BaseImagePreprocessor is, this uses map() and padded_batch() while the
# latter uses tf.data.experimental.map_and_batch(). Try to merge them.
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
else:
ds = ds.repeat()
ds = ds.map(map_func=self.parse_and_preprocess,
num_parallel_calls=batch_size_per_split*num_splits)
ds = ds.padded_batch(
batch_size=batch_size_per_split,
padded_shapes=tuple([
tf.TensorShape(output_shape[1:])
for output_shape in self.output_shapes
]),
drop_remainder=True)
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = num_threads
ds = ds.with_options(options)
return ds
def minibatch(self, dataset, subset, params, shift_ratio=-1):
assert params.use_datasets
# TODO(laigd): unify this with CNNModel's minibatch()
# TODO(laigd): in distributed mode we use shift_ratio so different workers
# won't work on same inputs, so we should respect that.
del shift_ratio
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
self.batch_size,
self.num_splits,
self.batch_size_per_split,
dataset,
subset,
self.is_train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
# The four lists are: input spectrogram feature, labels, input lengths,
# label lengths
input_lists = [[None for _ in range(self.num_splits)] for _ in range(4)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(4):
input_lists[i][d] = input_list[i]
assert self.output_shapes == [
input_lists[i][0].shape.as_list() for i in range(4)
]
return tuple(input_lists)
def supports_datasets(self):
return True
def parse_and_preprocess(self, value, batch_position):
"""Parse an TFRecord."""
del batch_position
assert self.supports_datasets()
context_features = {
'labels': tf.VarLenFeature(dtype=tf.int64),
'input_length': tf.FixedLenFeature([], dtype=tf.int64),
'label_length': tf.FixedLenFeature([], dtype=tf.int64),
}
sequence_features = {
'features': tf.FixedLenSequenceFeature([161], dtype=tf.float32)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=value,
context_features=context_features,
sequence_features=sequence_features,
)
return [
# Input
tf.expand_dims(sequence_parsed['features'], axis=2),
# Label
tf.cast(
tf.reshape(
tf.sparse_tensor_to_dense(context_parsed['labels']), [-1]),
dtype=tf.int32),
# Input length
tf.cast(
tf.reshape(context_parsed['input_length'], [1]),
dtype=tf.int32),
# Label length
tf.cast(
tf.reshape(context_parsed['label_length'], [1]),
dtype=tf.int32),
]
#!/bin/bash
source /public/home/qianyj/virtualenv/dtk21.10.1/dtk21.10.1_tf1.15/venv/bin/activate
export ROCM_PATH=/public/home/qianyj/package/dtk-21.10.1/dtk-21.10.1
export HIP_PATH=${ROCM_PATH}/hip
export CPACK_INSTLL_PREFIX=$ROCM_PATH
export AMDGPU_TARGETS="gfx900;gfx906"
export PATH=${ROCM_PATH}/bin:${ROCM_PATH}/llvm/bin:${ROCM_PATH}/hip/bin:$PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/lib:${ROCM_PATH}/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${ROCM_PATH}/hip/lib:${ROCM_PATH}/llvm/lib:$LD_LIBRARY_PATH
export C_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}/llvm/include${C_INCLUDE_PATH:+:${C_INCLUDE_PATH}}
export CPLUS_INCLUDE_PATH=${ROCM_PATH}/include:${ROCM_PATH}/llvm/include${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}}
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export TF_CPP_MIN_VLOG_LEVEL=2
HIP_VISIBLE_DEVICES=0,1,2,3 numactl --cpunodebind=0,1,2,3 --membind=0,1,2,3 nohup python3 tf_cnn_benchmarks.py --data_format=NCHW --batch_size=128 --model=resnet50 --save_model_steps=20000 --optimizer=momentum --variable_update=replicated --print_training_accuracy=true --eval_during_training_every_n_epochs=1 --nodistortions --num_gpus=4 --num_epochs=90 --weight_decay=1e-4 --data_dir=/public/software/apps/DeepLearning/Data/ImageNet-tensorflow/ --use_fp16=False --data_name=imagenet --train_dir=/public/home/qianyj/TF_test/dtk21.10.1/tf1.15/benchmarks-master/scripts/checkpoint >logfile 2>&1 &
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs the tf_cnn_benchmarks tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
from absl import app
from absl import flags as absl_flags
import tensorflow.compat.v1 as tf
import all_reduce_benchmark_test
import allreduce_test
import benchmark_cnn_distributed_test
import benchmark_cnn_test
import cnn_util_test
import variable_mgr_util_test
from models import model_config
# Ideally, we wouldn't need this option, and run both distributed tests and non-
# distributed tests. But, TensorFlow allocates all the GPU memory by default, so
# the non-distributed tests allocate all the GPU memory. The distributed tests
# spawn processes that run TensorFlow, and cannot run if all the GPU memory is
# already allocated. If a non-distributed test is run, then a distributed test
# is run in the same process, the distributed test will fail because there is no
# more GPU memory for the spawned processes to allocate.
absl_flags.DEFINE_boolean('run_distributed_tests', False,
'If True, run the distributed tests. If False, the'
'non-distributed tests.')
absl_flags.DEFINE_boolean('full_tests', False,
'If True, all distributed or non-distributed tests '
'are run, which can take hours. If False, only a '
'subset of tests will be run. This subset runs much '
'faster and tests almost all the functionality as '
'the full set of tests, so it is recommended to keep '
'this option set to False.')
FLAGS = absl_flags.FLAGS
def main(_):
loader = unittest.defaultTestLoader
if FLAGS.full_tests:
suite = unittest.TestSuite([
loader.loadTestsFromModule(allreduce_test),
loader.loadTestsFromModule(cnn_util_test),
loader.loadTestsFromModule(variable_mgr_util_test),
loader.loadTestsFromModule(benchmark_cnn_test),
loader.loadTestsFromModule(all_reduce_benchmark_test),
])
if model_config.can_import_contrib:
from models.tf1_only import nasnet_test # pylint: disable=g-import-not-at-top
suite.addTest(loader.loadTestsFromModule(nasnet_test))
dist_suite = unittest.TestSuite([
loader.loadTestsFromModule(benchmark_cnn_distributed_test),
])
else:
suite = unittest.TestSuite([
loader.loadTestsFromModule(allreduce_test),
loader.loadTestsFromModule(cnn_util_test),
loader.loadTestsFromModule(all_reduce_benchmark_test),
loader.loadTestsFromModule(variable_mgr_util_test),
loader.loadTestsFromTestCase(benchmark_cnn_test.TestAlexnetModel),
loader.loadTestsFromTestCase(benchmark_cnn_test.TfCnnBenchmarksTest),
loader.loadTestsFromTestCase(benchmark_cnn_test.VariableUpdateTest),
loader.loadTestsFromTestCase(
benchmark_cnn_test.VariableMgrLocalReplicatedTest),
])
dist_suite = unittest.TestSuite([
loader.loadTestsFromNames([
'benchmark_cnn_distributed_test.DistributedVariableUpdateTest'
'.testVarUpdateDefault',
'benchmark_cnn_distributed_test.TfCnnBenchmarksDistributedTest'
'.testParameterServer',
]),
])
if FLAGS.run_distributed_tests:
print('Running distributed tests')
result = unittest.TextTestRunner(verbosity=2).run(dist_suite)
else:
print('Running non-distributed tests')
result = unittest.TextTestRunner(verbosity=2).run(suite)
sys.exit(not result.wasSuccessful())
if __name__ == '__main__':
tf.disable_v2_behavior()
app.run(main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment