Commit 6b6f8b0c authored by huchen's avatar huchen
Browse files

del tensorflow benchmark cls

parent 4749cd5e
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Densenet model configuration.
References:
"Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
from models import model as model_lib
class DensenetCifar10Model(model_lib.CNNModel):
"""Densenet cnn network configuration."""
def __init__(self, model, layer_counts, growth_rate, params=None):
self.growth_rate = growth_rate
super(DensenetCifar10Model, self).__init__(
model, 32, 64, 0.1, layer_counts=layer_counts, params=params)
self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
def dense_block(self, cnn, growth_rate):
input_layer = cnn.top_layer
c = cnn.batch_norm(input_layer, **self.batch_norm_config)
c = tf.nn.relu(c)
c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
activation=None, input_layer=c)
channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
cnn.top_layer = tf.concat([input_layer, c], channel_index)
cnn.top_size += growth_rate
def transition_layer(self, cnn):
in_size = cnn.top_size
cnn.batch_norm(**self.batch_norm_config)
cnn.top_layer = tf.nn.relu(cnn.top_layer)
cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
cnn.apool(2, 2, 2, 2)
def add_inference(self, cnn):
if self.layer_counts is None:
raise ValueError('Layer counts not specified for %s' % self.get_model())
if self.growth_rate is None:
raise ValueError('Growth rate not specified for %s' % self.get_model())
cnn.conv(16, 3, 3, 1, 1, activation=None)
# Block 1
for _ in xrange(self.layer_counts[0]):
self.dense_block(cnn, self.growth_rate)
self.transition_layer(cnn)
# Block 2
for _ in xrange(self.layer_counts[1]):
self.dense_block(cnn, self.growth_rate)
self.transition_layer(cnn)
# Block 3
for _ in xrange(self.layer_counts[2]):
self.dense_block(cnn, self.growth_rate)
cnn.batch_norm(**self.batch_norm_config)
cnn.top_layer = tf.nn.relu(cnn.top_layer)
channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
cnn.spatial_mean()
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = 50000 // batch_size
boundaries = num_batches_per_epoch * np.array([150, 225, 300],
dtype=np.int64)
boundaries = [x for x in boundaries]
values = [0.1, 0.01, 0.001, 0.0001]
return tf.train.piecewise_constant(global_step, boundaries, values)
def create_densenet40_k12_model():
return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
def create_densenet100_k12_model():
return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
def create_densenet100_k24_model():
return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DeepSpeech2 model configuration.
References:
https://arxiv.org/abs/1512.02595
Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
import constants
from cnn_util import log_fn
from models import model as model_lib
from tensorflow.python.ops import variables # pylint: disable=g-direct-tensorflow-import
class DeepSpeechDecoder(object):
"""Greedy decoder implementation for Deep Speech model."""
def __init__(self, labels, blank_index=28):
"""Decoder initialization.
Arguments:
labels: a string specifying the speech labels for the decoder to use.
blank_index: an integer specifying index for the blank character. Defaults
to 28.
"""
self.labels = labels
self.blank_index = blank_index
self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)])
def convert_to_string(self, sequence):
"""Convert a sequence of indexes into corresponding string."""
return ''.join([self.int_to_char[i] for i in sequence])
def wer(self, decode, target):
"""Computes the Word Error Rate (WER).
WER is defined as the edit distance between the two provided sentences after
tokenizing to words.
Args:
decode: string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number for the WER of the current decode-target pair.
"""
try:
from nltk.metrics import distance # pylint: disable=g-import-not-at-top
except ImportError as e:
if 'nltk.metrics' not in e.message:
raise
raise ImportError('To use the experimental deepspeech model, you must '
'pip install -U nltk')
# Map each word to a new char.
words = set(decode.split() + target.split())
word2char = dict(zip(words, range(len(words))))
new_decode = [chr(word2char[w]) for w in decode.split()]
new_target = [chr(word2char[w]) for w in target.split()]
return distance.edit_distance(''.join(new_decode), ''.join(new_target))
def cer(self, decode, target):
"""Computes the Character Error Rate (CER).
CER is defined as the edit distance between the two given strings.
Args:
decode: a string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number denoting the CER for the current sentence pair.
"""
try:
from nltk.metrics import distance # pylint: disable=g-import-not-at-top
except ImportError as e:
if 'nltk.metrics' not in e.message:
raise
raise ImportError('To use the experimental deepspeech model, you must '
'pip install -U nltk')
return distance.edit_distance(decode, target)
def decode(self, char_indexes):
"""Decode the best guess from logits using greedy algorithm."""
# Merge repeated chars.
merge = [k for k, _ in itertools.groupby(char_indexes)]
# Remove the blank index in the decoded sequence.
merge_remove_blank = []
for k in merge:
if k != self.blank_index:
merge_remove_blank.append(k)
return self.convert_to_string(merge_remove_blank)
def decode_logits(self, logits):
"""Decode the best guess from logits using greedy algorithm."""
# Choose the class with maximimum probability.
best = list(np.argmax(logits, axis=1))
return self.decode(best)
class DeepSpeech2Model(model_lib.Model):
"""Define DeepSpeech2 model."""
# Supported rnn cells.
SUPPORTED_RNNS = {
'lstm': tf.nn.rnn_cell.BasicLSTMCell,
'rnn': tf.nn.rnn_cell.RNNCell,
'gru': tf.nn.rnn_cell.GRUCell,
}
# Parameters for batch normalization.
BATCH_NORM_EPSILON = 1e-5
BATCH_NORM_DECAY = 0.997
# Filters of convolution layer
CONV_FILTERS = 32
def __init__(self,
num_rnn_layers=5,
rnn_type='lstm',
is_bidirectional=True,
rnn_hidden_size=800,
use_bias=True,
params=None):
"""Initialize DeepSpeech2 model.
Args:
num_rnn_layers: an integer, the number of rnn layers (default: 5).
rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
rnn_hidden_size: an integer for the number of hidden units in the RNN
cell.
use_bias: a boolean specifying whether to use a bias in the last fc layer.
params: the params from BenchmarkCNN.
"""
super(DeepSpeech2Model, self).__init__(
'deepspeech2',
batch_size=128,
learning_rate=0.0005,
fp16_loss_scale=128,
params=params)
self.num_rnn_layers = num_rnn_layers
self.rnn_type = rnn_type
self.is_bidirectional = is_bidirectional
self.rnn_hidden_size = rnn_hidden_size
self.use_bias = use_bias
self.num_feature_bins = 161
self.max_time_steps = 3494
self.max_label_length = 576
def _batch_norm(self, inputs, training):
"""Batch normalization layer.
Note that the momentum to use will affect validation accuracy over time.
Batch norm has different behaviors during training/evaluation. With a large
momentum, the model takes longer to get a near-accurate estimation of the
moving mean/variance over the entire training dataset, which means we need
more iterations to see good evaluation results. If the training data is
evenly distributed over the feature space, we can also try setting a smaller
momentum (such as 0.1) to get good evaluation result sooner.
Args:
inputs: input data for batch norm layer.
training: a boolean to indicate if it is in training stage.
Returns:
tensor output from batch norm layer.
"""
return tf.layers.batch_normalization(
inputs=inputs,
momentum=DeepSpeech2Model.BATCH_NORM_DECAY,
epsilon=DeepSpeech2Model.BATCH_NORM_EPSILON,
fused=True,
training=training)
def _conv_bn_layer(self, inputs, padding, filters, kernel_size, strides,
layer_id, training):
"""Defines 2D convolutional + batch normalization layer.
Args:
inputs: input data for convolution layer.
padding: padding to be applied before convolution layer.
filters: an integer, number of output filters in the convolution.
kernel_size: a tuple specifying the height and width of the 2D convolution
window.
strides: a tuple specifying the stride length of the convolution.
layer_id: an integer specifying the layer index.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output from the current layer.
"""
# Perform symmetric padding on the feature dimension of time_step
# This step is required to avoid issues when RNN output sequence is shorter
# than the label length.
inputs = tf.pad(
inputs,
[[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]])
inputs = tf.layers.conv2d(
inputs=inputs,
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding='valid',
use_bias=False,
activation=tf.nn.relu6,
name='cnn_{}'.format(layer_id))
return self._batch_norm(inputs, training)
def _rnn_layer(self, inputs, rnn_cell, rnn_hidden_size, layer_id,
use_batch_norm, is_bidirectional, training):
"""Defines a batch normalization + rnn layer.
Args:
inputs: input tensors for the current layer.
rnn_cell: RNN cell instance to use.
rnn_hidden_size: an integer for the dimensionality of the rnn output
space.
layer_id: an integer for the index of current layer.
use_batch_norm: a boolean specifying whether to perform batch
normalization on input states.
is_bidirectional: a boolean specifying whether the rnn layer is
bi-directional.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output for the current layer.
"""
if use_batch_norm:
inputs = self._batch_norm(inputs, training)
# Construct forward/backward RNN cells.
fw_cell = rnn_cell(
num_units=rnn_hidden_size, name='rnn_fw_{}'.format(layer_id))
if is_bidirectional:
bw_cell = rnn_cell(
num_units=rnn_hidden_size, name='rnn_bw_{}'.format(layer_id))
outputs, _ = tf.nn.bidirectional_dynamic_rnn(
cell_fw=fw_cell,
cell_bw=bw_cell,
inputs=inputs,
dtype=tf.float32,
swap_memory=True)
rnn_outputs = tf.concat(outputs, -1)
else:
rnn_outputs = tf.nn.dynamic_rnn(
fw_cell, inputs, dtype=tf.float32, swap_memory=True)
return rnn_outputs
def get_input_data_types(self, subset):
"""Returns the list of data types of the inputs."""
del subset # Same data types for both train and validation subsets.
return [self.data_type, tf.int32, tf.int32, tf.int32]
def get_input_shapes(self, subset):
"""Returns the list of shapes of the padded inputs."""
del subset # Same shapes for both train and validation subsets
return [
[self.batch_size, self.max_time_steps, self.num_feature_bins, 1],
[self.batch_size, self.max_label_length],
[self.batch_size, 1],
[self.batch_size, 1],
]
def get_synthetic_inputs(self, input_name, nclass):
inputs = tf.random_uniform(self.get_input_shapes('train')[0],
dtype=self.get_input_data_types('train')[0])
inputs = variables.VariableV1(inputs, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
labels = tf.convert_to_tensor(
np.random.randint(28, size=[self.batch_size, self.max_label_length]))
input_lengths = tf.convert_to_tensor(
[self.max_time_steps] * self.batch_size)
label_lengths = tf.convert_to_tensor(
[self.max_label_length] * self.batch_size)
return [inputs, labels, input_lengths, label_lengths]
# TODO(laigd): support fp16.
# TODO(laigd): support multiple gpus.
def build_network(self, inputs, phase_train=True, nclass=29):
"""Builds the forward pass of the deepspeech2 model.
Args:
inputs: The input list of the model.
phase_train: True during training. False during evaluation.
nclass: Number of classes that the input spectrogram can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
inputs = inputs[0] # Get the spectrogram feature.
# Two cnn layers.
inputs = self._conv_bn_layer(
inputs,
padding=(20, 5),
filters=DeepSpeech2Model.CONV_FILTERS,
kernel_size=(41, 11),
strides=(2, 2),
layer_id=1,
training=phase_train)
inputs = self._conv_bn_layer(
inputs,
padding=(10, 5),
filters=DeepSpeech2Model.CONV_FILTERS,
kernel_size=(21, 11),
strides=(2, 1),
layer_id=2,
training=phase_train)
# output of conv_layer2 with the shape of
# [batch_size (N), times (T), features (F), channels (C)].
# Convert the conv output to rnn input.
# batch_size = tf.shape(inputs)[0]
feat_size = inputs.get_shape().as_list()[2]
inputs = tf.reshape(
inputs,
[self.batch_size, -1, feat_size * DeepSpeech2Model.CONV_FILTERS])
# RNN layers.
rnn_cell = DeepSpeech2Model.SUPPORTED_RNNS[self.rnn_type]
for layer_counter in xrange(self.num_rnn_layers):
# No batch normalization on the first layer.
use_batch_norm = (layer_counter != 0)
inputs = self._rnn_layer(inputs, rnn_cell, self.rnn_hidden_size,
layer_counter + 1, use_batch_norm,
self.is_bidirectional, phase_train)
# FC layer with batch norm.
inputs = self._batch_norm(inputs, phase_train)
logits = tf.layers.dense(inputs, nclass, use_bias=self.use_bias)
return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
def loss_function(self, inputs, build_network_result):
"""Computes the ctc loss for the current batch of predictions.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
logits = build_network_result.logits
actual_time_steps = inputs[2]
probs = tf.nn.softmax(logits)
ctc_time_steps = tf.shape(probs)[1]
ctc_input_length = tf.to_float(
tf.multiply(actual_time_steps, ctc_time_steps))
ctc_input_length = tf.to_int32(
tf.floordiv(ctc_input_length, tf.to_float(self.max_time_steps)))
label_length = inputs[3]
label_length = tf.to_int32(tf.squeeze(label_length))
ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length))
labels = inputs[1]
sparse_labels = tf.to_int32(
tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length))
y_pred = tf.log(
tf.transpose(probs, perm=[1, 0, 2]) + tf.keras.backend.epsilon())
losses = tf.expand_dims(
tf.nn.ctc_loss(
labels=sparse_labels,
inputs=y_pred,
sequence_length=ctc_input_length,
ignore_longer_outputs_than_inputs=True),
axis=1)
loss = tf.reduce_mean(losses)
return loss
PROBABILITY_TENSOR = 'deepspeech2_prob'
LABEL_TENSOR = 'deepspeech2_label'
def accuracy_function(self, inputs, logits):
"""Returns the ops to evaluate the model performance."""
# Get probabilities of each predicted class
probs = tf.nn.softmax(logits)
assert probs.shape.as_list()[0] == self.batch_size
return {
(constants.UNREDUCED_ACCURACY_OP_PREFIX + self.PROBABILITY_TENSOR):
probs,
(constants.UNREDUCED_ACCURACY_OP_PREFIX + self.LABEL_TENSOR):
inputs[1],
}
def postprocess(self, results):
"""Postprocess results returned from model in Python."""
probs = results[self.PROBABILITY_TENSOR]
total_wer, total_cer = 0, 0
speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
greedy_decoder = DeepSpeechDecoder(speech_labels)
# Evaluate the performance using WER (Word Error Rate) and CER (Character
# Error Rate) as metrics.
targets = results[self.LABEL_TENSOR] # The ground truth transcript
for i in range(self.batch_size):
# Decode string.
predicted_str = greedy_decoder.decode_logits(probs[i])
expected_str = greedy_decoder.decode(targets[i])
# Compute CER.
total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
len(expected_str))
# Compute WER.
total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
len(expected_str.split()))
# Get mean value
total_cer /= self.batch_size
total_wer /= self.batch_size
log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
total_cer, total_wer, self.batch_size))
# TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
intended to be used only with CNNs.
Only synthetic data with 1 GPU is currently supported.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from models import model
# Obtained by running the official NCF model with the following command:
# python ncf_main.py --dataset ml-20m
# and printing the number of users and items here:
# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
_NUM_USERS_20M = 138493
_NUM_ITEMS_20M = 26744
# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
# uses, ignore variable_scopes, which we rely on for multi-GPU support.
# TODO(reedwm): Support real data. This will require a significant refactor.
# TODO(reedwm): All-reduce IndexedSlices more effectively.
# TODO(reedwm): Support the 1M variant of this model.
class NcfModel(model.Model):
r"""A model.Model wrapper around the official NCF recommendation model.
To do an NCF run with synthetic data that roughly matches what the official
model does, run:
python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
--weight_decay=0 --sparse_to_dense_grads
"""
def __init__(self, params=None):
super(NcfModel, self).__init__(
'official_ncf', batch_size=2048, learning_rate=0.0005,
fp16_loss_scale=128, params=params)
if self.fp16_vars:
raise ValueError('NCF model only supports float32 variables for now.')
def build_network(self, inputs, phase_train=True, nclass=1001):
try:
from official.recommendation import neumf_model # pylint: disable=g-import-not-at-top
except ImportError as e:
if 'neumf_model' not in e.message:
raise
raise ImportError('To use the experimental NCF model, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models to the PYTHONPATH.')
del nclass
users, items, _ = inputs
params = {
'num_users': _NUM_USERS_20M,
'num_items': _NUM_ITEMS_20M,
'model_layers': (256, 256, 128, 64),
'mf_dim': 64,
'mf_regularization': 0,
'mlp_reg_layers': (0, 0, 0, 0),
'use_tpu': False
}
user_input = tf.keras.layers.Input(tensor=users, name='user_input')
item_input = tf.keras.layers.Input(tensor=items, name='item_input')
if self.data_type == tf.float32:
keras_model = neumf_model.construct_model(user_input, item_input, params)
logits = keras_model.output
else:
assert self.data_type == tf.float16
old_floatx = tf.keras.backend.floatx()
try:
tf.keras.backend.set_floatx('float16')
# We cannot rely on the variable_scope's fp16 custom getter here,
# because the NCF model uses keras layers, which ignore variable scopes.
# So we use a variable_creator_scope instead.
with tf.variable_creator_scope(_fp16_variable_creator):
keras_model = neumf_model.construct_model(user_input, item_input,
params)
logits = tf.cast(keras_model.output, tf.float32)
finally:
tf.keras.backend.set_floatx(old_floatx)
return model.BuildNetworkResult(logits=logits, extra_info=None)
def loss_function(self, inputs, build_network_result):
logits = build_network_result.logits
# Softmax with the first column of ones is equivalent to sigmoid.
# TODO(reedwm): Actually, the first column should be zeros to be equivalent
# to sigmoid. But, we keep it at ones to match the official models.
logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits],
axis=1)
return tf.losses.sparse_softmax_cross_entropy(
labels=inputs[2],
logits=logits
)
def get_synthetic_inputs(self, input_name, nclass):
"""Returns the ops to generate synthetic inputs and labels."""
def users_init_val():
return tf.random_uniform((self.batch_size, 1), minval=0,
maxval=_NUM_USERS_20M, dtype=tf.int32)
users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name='synthetic_users')
def items_init_val():
return tf.random_uniform((self.batch_size, 1), minval=0,
maxval=_NUM_ITEMS_20M, dtype=tf.int32)
items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name='synthetic_items')
def labels_init_val():
return tf.random_uniform((self.batch_size,), minval=0, maxval=2,
dtype=tf.int32)
labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name='synthetic_labels')
return [users, items, labels]
def get_input_shapes(self, subset):
del subset
return [[self.batch_size, 1], [self.batch_size, 1], [self.batch_size]]
def get_input_data_types(self, subset):
del subset
return [self.int32, tf.int32, tf.int32]
def _fp16_variable_creator(next_creator, **kwargs):
"""Variable creator to create variables in fp32 and cast them to fp16."""
dtype = kwargs.get('dtype', None)
initial_value = kwargs.get('initial_value', None)
if dtype is None:
if initial_value is not None and not callable(initial_value):
dtype = initial_value.dtype
if dtype == tf.float16:
if callable(initial_value):
new_initial_value = lambda: tf.cast(initial_value(), tf.float32)
else:
new_initial_value = tf.cast(initial_value, tf.float32)
kwargs['dtype'] = tf.float32
kwargs['initial_value'] = new_initial_value
var = next_creator(**kwargs)
return tf.cast(var, dtype=tf.float16)
else:
return next_creator(**kwargs)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Googlenet model configuration.
References:
Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
Going deeper with convolutions
arXiv preprint arXiv:1409.4842 (2014)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from models import model
class GooglenetModel(model.CNNModel):
"""GoogLeNet."""
def __init__(self, params=None):
super(GooglenetModel, self).__init__(
'googlenet', 224, 32, 0.005, params=params)
def add_inference(self, cnn):
def inception_v1(cnn, k, l, m, n, p, q):
cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
[('conv', n, 1, 1), ('conv', p, 5, 5)],
[('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
cnn.inception_module('incept_v1', cols)
cnn.conv(64, 7, 7, 2, 2)
cnn.mpool(3, 3, 2, 2, mode='SAME')
cnn.conv(64, 1, 1)
cnn.conv(192, 3, 3)
cnn.mpool(3, 3, 2, 2, mode='SAME')
inception_v1(cnn, 64, 96, 128, 16, 32, 32)
inception_v1(cnn, 128, 128, 192, 32, 96, 64)
cnn.mpool(3, 3, 2, 2, mode='SAME')
inception_v1(cnn, 192, 96, 208, 16, 48, 64)
inception_v1(cnn, 160, 112, 224, 24, 64, 64)
inception_v1(cnn, 128, 128, 256, 24, 64, 64)
inception_v1(cnn, 112, 144, 288, 32, 64, 64)
inception_v1(cnn, 256, 160, 320, 32, 128, 128)
cnn.mpool(3, 3, 2, 2, mode='SAME')
inception_v1(cnn, 256, 160, 320, 32, 128, 128)
inception_v1(cnn, 384, 192, 384, 48, 128, 128)
cnn.apool(7, 7, 1, 1, mode='VALID')
cnn.reshape([-1, 1024])
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception model configuration.
Includes multiple models: inception3, inception4, inception-resnet2.
References:
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
Inception-v4, Inception-ResNet and the Impact of Residual Connections on
Learning
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
Going Deeper with Convolutions
http://arxiv.org/pdf/1409.4842v1.pdf
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
Zbigniew Wojna
Rethinking the Inception Architecture for Computer Vision
arXiv preprint arXiv:1512.00567 (2015)
Inception v3 model: http://arxiv.org/abs/1512.00567
Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange # pylint: disable=redefined-builtin
from models import model
class Inceptionv3Model(model.CNNModel):
"""InceptionV3."""
def __init__(self, auxiliary=False, params=None):
self._auxiliary = auxiliary
super(Inceptionv3Model, self).__init__(
'inception3', 299, 32, 0.005, params=params)
def add_inference(self, cnn):
def inception_v3_a(cnn, n):
cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
[('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
[('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
cnn.inception_module('incept_v3_a', cols)
def inception_v3_b(cnn):
cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
[('conv', 64, 1, 1),
('conv', 96, 3, 3),
('conv', 96, 3, 3, 2, 2, 'VALID')],
[('mpool', 3, 3, 2, 2, 'VALID')]]
cnn.inception_module('incept_v3_b', cols)
def inception_v3_c(cnn, n):
cols = [[('conv', 192, 1, 1)],
[('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
[('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
('conv', n, 7, 1), ('conv', 192, 1, 7)],
[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
cnn.inception_module('incept_v3_c', cols)
def inception_v3_d(cnn):
cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
[('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
('conv', 192, 3, 3, 2, 2, 'VALID')],
[('mpool', 3, 3, 2, 2, 'VALID')]]
cnn.inception_module('incept_v3_d', cols)
def inception_v3_e(cnn, pooltype):
cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
[('share',), ('conv', 384, 3, 1)],
[('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
[('share',), ('share',), ('conv', 384, 3, 1)],
[('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
('conv', 192, 1, 1)]]
cnn.inception_module('incept_v3_e', cols)
def incept_v3_aux(cnn):
assert cnn.aux_top_layer is None
cnn.aux_top_layer = cnn.top_layer
cnn.aux_top_size = cnn.top_size
with cnn.switch_to_aux_top_layer():
cnn.apool(5, 5, 3, 3, mode='VALID')
cnn.conv(128, 1, 1, mode='SAME')
cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
cnn.reshape([-1, 768])
cnn.use_batch_norm = True
cnn.conv(32, 3, 3, 2, 2, mode='VALID') # 299 x 299 x 3
cnn.conv(32, 3, 3, 1, 1, mode='VALID') # 149 x 149 x 32
cnn.conv(64, 3, 3, 1, 1, mode='SAME') # 147 x 147 x 64
cnn.mpool(3, 3, 2, 2, mode='VALID') # 147 x 147 x 64
cnn.conv(80, 1, 1, 1, 1, mode='VALID') # 73 x 73 x 80
cnn.conv(192, 3, 3, 1, 1, mode='VALID') # 71 x 71 x 192
cnn.mpool(3, 3, 2, 2, 'VALID') # 35 x 35 x 192
inception_v3_a(cnn, 32) # 35 x 35 x 256 mixed.
inception_v3_a(cnn, 64) # 35 x 35 x 288 mixed_1.
inception_v3_a(cnn, 64) # 35 x 35 x 288 mixed_2
inception_v3_b(cnn) # 17 x 17 x 768 mixed_3
inception_v3_c(cnn, 128) # 17 x 17 x 768 mixed_4
inception_v3_c(cnn, 160) # 17 x 17 x 768 mixed_5
inception_v3_c(cnn, 160) # 17 x 17 x 768 mixed_6
inception_v3_c(cnn, 192) # 17 x 17 x 768 mixed_7
if self._auxiliary:
incept_v3_aux(cnn) # Auxillary Head logits
inception_v3_d(cnn) # 17 x 17 x 1280 mixed_8
inception_v3_e(cnn, 'avg') # 8 x 8 x 2048 mixed_9
inception_v3_e(cnn, 'max') # 8 x 8 x 2048 mixed_10
cnn.apool(8, 8, 1, 1, 'VALID') # 8 x 8 x 2048
cnn.reshape([-1, 2048]) # 1 x 1 x 2048
# Stem functions
def inception_v4_sa(cnn):
cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]]
cnn.inception_module('incept_v4_sa', cols)
def inception_v4_sb(cnn):
cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
[('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
('conv', 96, 3, 3, 1, 1, 'VALID')]]
cnn.inception_module('incept_v4_sb', cols)
def inception_v4_sc(cnn):
cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
[('mpool', 3, 3, 2, 2, 'VALID')]]
cnn.inception_module('incept_v4_sc', cols)
# Reduction functions
def inception_v4_ra(cnn, k, l, m, n):
cols = [
[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
[('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
]
cnn.inception_module('incept_v4_ra', cols)
def inception_v4_rb(cnn):
cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
[('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
[('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
('conv', 320, 3, 3, 2, 2, 'VALID')]]
cnn.inception_module('incept_v4_rb', cols)
class Inceptionv4Model(model.CNNModel):
"""Inceptionv4."""
def __init__(self, params=None):
super(Inceptionv4Model, self).__init__(
'inception4', 299, 32, 0.005, params=params)
def add_inference(self, cnn):
def inception_v4_a(cnn):
cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
[('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
[('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
cnn.inception_module('incept_v4_a', cols)
def inception_v4_b(cnn):
cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
[('conv', 384, 1, 1)],
[('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
[('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
cnn.inception_module('incept_v4_b', cols)
def inception_v4_c(cnn):
cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
[('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
[('share',), ('conv', 256, 3, 1)],
[('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
('conv', 256, 1, 3)]]
cnn.inception_module('incept_v4_c', cols)
cnn.use_batch_norm = True
cnn.conv(32, 3, 3, 2, 2, mode='VALID')
cnn.conv(32, 3, 3, 1, 1, mode='VALID')
cnn.conv(64, 3, 3)
inception_v4_sa(cnn)
inception_v4_sb(cnn)
inception_v4_sc(cnn)
for _ in xrange(4):
inception_v4_a(cnn)
inception_v4_ra(cnn, 192, 224, 256, 384)
for _ in xrange(7):
inception_v4_b(cnn)
inception_v4_rb(cnn)
for _ in xrange(3):
inception_v4_c(cnn)
cnn.spatial_mean()
cnn.dropout(0.8)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Lenet model configuration.
References:
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
Gradient-based learning applied to document recognition
Proceedings of the IEEE (1998)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from models import model
class Lenet5Model(model.CNNModel):
"""Lenet5."""
def __init__(self, params=None):
super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params)
def add_inference(self, cnn):
# Note: This matches TF's MNIST tutorial model
cnn.conv(32, 5, 5)
cnn.mpool(2, 2)
cnn.conv(64, 5, 5)
cnn.mpool(2, 2)
cnn.reshape([-1, 64 * 7 * 7])
cnn.affine(512)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base model configuration for CNN benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
import tensorflow.compat.v1 as tf
import convnet_builder
import mlperf
from tensorflow.python.ops import variables as variables_module # pylint: disable=g-direct-tensorflow-import
# BuildNetworkResult encapsulate the result (e.g. logits) of a
# Model.build_network() call.
BuildNetworkResult = namedtuple(
'BuildNetworkResult',
[
'logits', # logits of the network
'extra_info', # Model specific extra information
])
class Model(object):
"""Base model config for DNN benchmarks."""
def __init__(self,
model_name,
batch_size,
learning_rate,
fp16_loss_scale,
params=None):
self.model_name = model_name
self.batch_size = batch_size
self.default_batch_size = batch_size
self.learning_rate = learning_rate
# TODO(reedwm) Set custom loss scales for each model instead of using the
# default of 128.
self.fp16_loss_scale = fp16_loss_scale
# use_tf_layers specifies whether to build the model using tf.layers.
# fp16_vars specifies whether to create the variables in float16.
if params:
self.use_tf_layers = params.use_tf_layers
self.fp16_vars = params.fp16_vars
self.data_type = tf.float16 if params.use_fp16 else tf.float32
else:
self.use_tf_layers = True
self.fp16_vars = False
self.data_type = tf.float32
def get_model_name(self):
return self.model_name
def get_batch_size(self):
return self.batch_size
def set_batch_size(self, batch_size):
self.batch_size = batch_size
def get_default_batch_size(self):
return self.default_batch_size
def get_fp16_loss_scale(self):
return self.fp16_loss_scale
def filter_l2_loss_vars(self, variables):
"""Filters out variables that the L2 loss should not be computed for.
By default, this filters out batch normalization variables and keeps all
other variables. This behavior can be overridden by subclasses.
Args:
variables: A list of the trainable variables.
Returns:
A list of variables that the L2 loss should be computed for.
"""
mlperf.logger.log(key=mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2,
value=True)
return [v for v in variables if 'batchnorm' not in v.name]
def get_learning_rate(self, global_step, batch_size):
del global_step
del batch_size
return self.learning_rate
def get_input_shapes(self, subset):
"""Returns the list of expected shapes of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Returns the list of data types of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_synthetic_inputs(self, input_name, nclass):
"""Returns the ops to generate synthetic inputs."""
raise NotImplementedError('Must be implemented in derived classes')
def build_network(self, inputs, phase_train, nclass):
"""Builds the forward pass of the model.
Args:
inputs: The list of inputs, including labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the inputs can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
raise NotImplementedError('Must be implemented in derived classes')
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
raise NotImplementedError('Must be implemented in derived classes')
# TODO(laigd): have accuracy_function() take build_network_result instead.
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
raise NotImplementedError('Must be implemented in derived classes')
def postprocess(self, results):
"""Postprocess results returned from model in Python."""
return results
def reached_target(self):
"""Define custom methods to stop training when model's target is reached."""
return False
class CNNModel(Model):
"""Base model configuration for CNN benchmarks."""
# TODO(laigd): reduce the number of parameters and read everything from
# params.
def __init__(self,
model,
image_size,
batch_size,
learning_rate,
layer_counts=None,
fp16_loss_scale=128,
params=None):
super(CNNModel, self).__init__(
model, batch_size, learning_rate, fp16_loss_scale,
params=params)
self.image_size = image_size
self.layer_counts = layer_counts
self.depth = 3
self.params = params
self.data_format = params.data_format if params else 'NCHW'
def get_layer_counts(self):
return self.layer_counts
def skip_final_affine_layer(self):
"""Returns if the caller of this class should skip the final affine layer.
Normally, this class adds a final affine layer to the model after calling
self.add_inference(), to generate the logits. If a subclass override this
method to return True, the caller should not add the final affine layer.
This is useful for tests.
"""
return False
def add_backbone_saver(self):
"""Creates a tf.train.Saver as self.backbone_saver for loading backbone.
A tf.train.Saver must be created and saved in self.backbone_saver before
calling load_backbone_model, with correct variable name mapping to load
variables from checkpoint correctly into the current model.
"""
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def load_backbone_model(self, sess, backbone_model_path):
"""Loads variable values from a pre-trained backbone model.
This should be used at the beginning of the training process for transfer
learning models using checkpoints of base models.
Args:
sess: session to train the model.
backbone_model_path: path to backbone model checkpoint file.
"""
del sess, backbone_model_path
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def add_inference(self, cnn):
"""Adds the core layers of the CNN's forward pass.
This should build the forward pass layers, except for the initial transpose
of the images and the final Dense layer producing the logits. The layers
should be build with the ConvNetBuilder `cnn`, so that when this function
returns, `cnn.top_layer` and `cnn.top_size` refer to the last layer and the
number of units of the layer layer, respectively.
Args:
cnn: A ConvNetBuilder to build the forward pass layers with.
"""
del cnn
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Return data types of inputs for the specified subset."""
del subset # Same types for both 'train' and 'validation' subsets.
return [self.data_type, tf.int32]
def get_input_shapes(self, subset):
"""Return data shapes of inputs for the specified subset."""
del subset # Same shapes for both 'train' and 'validation' subsets.
# Each input is of shape [batch_size, height, width, depth]
# Each label is of shape [batch_size]
return [[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size]]
def get_synthetic_inputs(self, input_name, nclass):
# Synthetic input should be within [0, 255].
image_shape, label_shape = self.get_input_shapes('train')
inputs = tf.truncated_normal(
image_shape,
dtype=self.data_type,
mean=127,
stddev=60,
name=self.model_name + '_synthetic_inputs')
inputs = variables_module.VariableV1(
inputs, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
labels = tf.random_uniform(
label_shape,
minval=0,
maxval=nclass - 1,
dtype=tf.int32,
name=self.model_name + '_synthetic_labels')
return (inputs, labels)
def gpu_preprocess_nhwc(self, images, phase_train=True):
del phase_train
return images
def build_network(self,
inputs,
phase_train=True,
nclass=1001):
"""Returns logits from input images.
Args:
inputs: The input images and labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the images can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
images = inputs[0]
images = self.gpu_preprocess_nhwc(images, phase_train)
if self.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
var_type = tf.float32
if self.data_type == tf.float16 and self.fp16_vars:
var_type = tf.float16
network = convnet_builder.ConvNetBuilder(
images, self.depth, phase_train, self.use_tf_layers, self.data_format,
self.data_type, var_type)
with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
self.add_inference(network)
# Add the final fully-connected class layer
logits = (
network.affine(nclass, activation='linear')
if not self.skip_final_affine_layer() else network.top_layer)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_FINAL_SHAPE,
value=logits.shape.as_list()[1:])
aux_logits = None
if network.aux_top_layer is not None:
with network.switch_to_aux_top_layer():
aux_logits = network.affine(nclass, activation='linear', stddev=0.001)
if self.data_type == tf.float16:
# TODO(reedwm): Determine if we should do this cast here.
logits = tf.cast(logits, tf.float32)
if aux_logits is not None:
aux_logits = tf.cast(aux_logits, tf.float32)
return BuildNetworkResult(
logits=logits, extra_info=None if aux_logits is None else aux_logits)
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model."""
logits = build_network_result.logits
_, labels = inputs
# TODO(laigd): consider putting the aux logit in the Inception model,
# which could call super.loss_function twice, once with the normal logits
# and once with the aux logits.
aux_logits = build_network_result.extra_info
with tf.name_scope('xentropy'):
mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
if aux_logits is not None:
with tf.name_scope('aux_xentropy'):
aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=aux_logits, labels=labels)
aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
loss = tf.add_n([loss, aux_loss])
return loss
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
_, labels = inputs
top_1_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 1), self.data_type))
top_5_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 5), self.data_type))
return {'top_1_accuracy': top_1_op, 'top_5_accuracy': top_5_op}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Model configurations for CNN benchmarks.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from functools import partial
from models import alexnet_model
from models import densenet_model
from models import googlenet_model
from models import inception_model
from models import lenet_model
from models import official_resnet_model
from models import overfeat_model
from models import resnet_model
from models import trivial_model
from models import vgg_model
from models.experimental import deepspeech
from models.experimental import official_ncf_model
_model_name_to_imagenet_model = {
'vgg11': vgg_model.Vgg11Model,
'vgg16': vgg_model.Vgg16Model,
'vgg19': vgg_model.Vgg19Model,
'lenet': lenet_model.Lenet5Model,
'googlenet': googlenet_model.GooglenetModel,
'overfeat': overfeat_model.OverfeatModel,
'alexnet': alexnet_model.AlexnetModel,
'trivial': trivial_model.TrivialModel,
'inception3': inception_model.Inceptionv3Model,
'inception4': inception_model.Inceptionv4Model,
'official_resnet18_v2':
partial(official_resnet_model.ImagenetResnetModel, 18),
'official_resnet34_v2':
partial(official_resnet_model.ImagenetResnetModel, 34),
'official_resnet50_v2':
partial(official_resnet_model.ImagenetResnetModel, 50),
'official_resnet101_v2':
partial(official_resnet_model.ImagenetResnetModel, 101),
'official_resnet152_v2':
partial(official_resnet_model.ImagenetResnetModel, 152),
'official_resnet200_v2':
partial(official_resnet_model.ImagenetResnetModel, 200),
'official_resnet18':
partial(official_resnet_model.ImagenetResnetModel, 18, version=1),
'official_resnet34':
partial(official_resnet_model.ImagenetResnetModel, 34, version=1),
'official_resnet50':
partial(official_resnet_model.ImagenetResnetModel, 50, version=1),
'official_resnet101':
partial(official_resnet_model.ImagenetResnetModel, 101, version=1),
'official_resnet152':
partial(official_resnet_model.ImagenetResnetModel, 152, version=1),
'official_resnet200':
partial(official_resnet_model.ImagenetResnetModel, 200, version=1),
'resnet50': resnet_model.create_resnet50_model,
'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model,
'resnet50_v2': resnet_model.create_resnet50_v2_model,
'resnet101': resnet_model.create_resnet101_model,
'resnet101_v2': resnet_model.create_resnet101_v2_model,
'resnet152': resnet_model.create_resnet152_model,
'resnet152_v2': resnet_model.create_resnet152_v2_model,
'ncf': official_ncf_model.NcfModel,
}
_model_name_to_cifar_model = {
'alexnet': alexnet_model.AlexnetCifar10Model,
'resnet20': resnet_model.create_resnet20_cifar_model,
'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model,
'resnet32': resnet_model.create_resnet32_cifar_model,
'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model,
'resnet44': resnet_model.create_resnet44_cifar_model,
'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model,
'resnet56': resnet_model.create_resnet56_cifar_model,
'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model,
'resnet110': resnet_model.create_resnet110_cifar_model,
'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model,
'trivial': trivial_model.TrivialCifar10Model,
'densenet40_k12': densenet_model.create_densenet40_k12_model,
'densenet100_k12': densenet_model.create_densenet100_k12_model,
'densenet100_k24': densenet_model.create_densenet100_k24_model,
}
_model_name_to_object_detection_model = {
'trivial': trivial_model.TrivialSSD300Model,
}
def _get_model_map(dataset_name):
"""Get name to model map for specified dataset."""
if dataset_name == 'cifar10':
return _model_name_to_cifar_model
elif dataset_name in ('imagenet', 'synthetic'):
return _model_name_to_imagenet_model
elif dataset_name == 'librispeech':
return {'deepspeech2': deepspeech.DeepSpeech2Model}
elif dataset_name == 'coco':
return _model_name_to_object_detection_model
else:
raise ValueError('Invalid dataset name: %s' % dataset_name)
# A model map dict can have this string as a value when TF2 is used, to indicate
# the model is only available in TF1.
_TF1_ONLY_STRING = 'TF1_ONLY'
def get_model_config(model_name, dataset, params):
"""Map model name to model network configuration."""
model_map = _get_model_map(dataset.name)
if model_name not in model_map:
raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
(model_name, dataset.name))
model = model_map[model_name](params=params)
if model == 'TF1_ONLY':
raise ValueError('Model \'%s\' can only be used with TensorFlow 1'
% (model_name,))
return model
def register_model(model_name, dataset_name, model_func):
"""Register a new model that can be obtained with `get_model_config`."""
model_map = _get_model_map(dataset_name)
if model_name in model_map:
raise ValueError('Model "%s" is already registered for dataset "%s"' %
(model_name, dataset_name))
model_map[model_name] = model_func
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import slim # pylint: disable=unused-import
can_import_contrib = True
except ImportError:
can_import_contrib = False
def register_tf1_models():
"""Registers all the TensorFlow 1-only models.
TF 1-only models use contrib, which was removed in TF 2. If contrib can be
imported, the TF 1-only models are registered normally. If contrib cannot be
imported, the models are registered with the 'TF1_ONLY' string instead, which
will cause an error to be thrown if these models are used.
"""
if can_import_contrib:
from models.tf1_only import mobilenet_v2
from models.tf1_only import nasnet_model
from models.tf1_only import ssd_model
register_model('mobilenet', 'imagenet', mobilenet_v2.MobilenetModel)
register_model('nasnet', 'imagenet', nasnet_model.NasnetModel)
register_model('nasnetlarge', 'imagenet', nasnet_model.NasnetLargeModel)
register_model('nasnet', 'cifar10', nasnet_model.NasnetCifarModel)
register_model('ssd300', 'coco', ssd_model.SSD300Model)
else:
register_model('mobilenet', 'imagenet', 'TF1_ONLY')
register_model('nasnet', 'imagenet', 'TF1_ONLY')
register_model('nasnetlarge', 'imagenet', 'TF1_ONLY')
register_model('nasnet', 'cifar10', 'TF1_ONLY')
register_model('ssd300', 'coco', 'TF1_ONLY')
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import official resnet models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
import datasets
from models import model as model_lib
class ImagenetResnetModel(model_lib.CNNModel):
"""Official resnet models."""
def __init__(self, resnet_size, version=2, params=None):
"""These are the parameters that work for Imagenet data.
Args:
resnet_size: The number of convolutional layers needed in the model.
version: 1 or 2 for v1 or v2, respectively.
params: params passed by BenchmarkCNN.
"""
default_batch_sizes = {
50: 128,
101: 32,
152: 32
}
batch_size = default_batch_sizes.get(resnet_size, 32)
default_learning_rate = 0.0125 * batch_size / 32
model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
super(ImagenetResnetModel, self).__init__(
model_name, 224, batch_size, default_learning_rate, params=params)
self.resnet_size = resnet_size
self.version = version
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = (
float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
values = [1, 0.1, 0.01, 0.001, 0.0001]
adjusted_learning_rate = (
self.learning_rate / self.default_batch_size * batch_size)
values = [v * adjusted_learning_rate for v in values]
return tf.train.piecewise_constant(global_step, boundaries, values)
def build_network(self, images, phase_train=True, nclass=1001,
data_type=tf.float32):
# pylint: disable=g-import-not-at-top
try:
from official.resnet.r1.imagenet_main import ImagenetModel
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
images = tf.cast(images, data_type)
model_class = ImagenetModel(resnet_size=self.resnet_size,
resnet_version=self.version,
# The official model dtype seems to be ignored,
# as the dtype it uses is the dtype of the input
# images. Doesn't hurt to set it though.
dtype=data_type)
logits = model_class(images, phase_train)
logits = tf.cast(logits, tf.float32)
return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Overfeat model configuration.
References:
OverFeat: Integrated Recognition, Localization and Detection using
Convolutional Networks
Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus,
Yann LeCun, 2014
http://arxiv.org/abs/1312.6229
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from models import model
class OverfeatModel(model.CNNModel):
"""OverfeatModel."""
def __init__(self, params=None):
super(OverfeatModel, self).__init__(
'overfeat', 231, 32, 0.005, params=params)
def add_inference(self, cnn):
# Note: VALID requires padding the images by 3 in width and height
cnn.conv(96, 11, 11, 4, 4, mode='VALID')
cnn.mpool(2, 2)
cnn.conv(256, 5, 5, 1, 1, mode='VALID')
cnn.mpool(2, 2)
cnn.conv(512, 3, 3)
cnn.conv(1024, 3, 3)
cnn.conv(1024, 3, 3)
cnn.mpool(2, 2)
cnn.reshape([-1, 1024 * 6 * 6])
cnn.affine(3072)
cnn.dropout()
cnn.affine(4096)
cnn.dropout()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnet model configuration.
References:
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition
arXiv:1512.03385 (2015)
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks
arXiv:1603.05027 (2016)
Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy,
Alan L. Yuille
DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
Atrous Convolution, and Fully Connected CRFs
arXiv:1606.00915 (2016)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
import datasets
import mlperf
from models import model as model_lib
def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v1.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v1'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
cnn.conv(depth_bottleneck, 1, 1, stride, stride,
input_layer=input_layer, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=True, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v1.5.
ResNet v1.5 is the informal name for ResNet v1 where stride 2 is used in the
first 3x3 convolution of each block instead of the first 1x1 convolution.
First seen at https://github.com/facebook/fb.resnet.torch. Used in the paper
"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"
(arXiv:1706.02677v2) and by fast.ai to train to accuracy in 45 epochs using
multiple image sizes.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v1.5'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
cnn.conv(depth_bottleneck, 1, 1, 1, 1,
input_layer=input_layer, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, stride, stride, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=True, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
"""Bottleneck block with identity short-cut for ResNet v2.
The main difference from v1 is that a batch norm and relu are done at the
start of the block, instead of the end. This initial batch norm and relu is
collectively called a pre-activation.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
"""
input_layer = cnn.top_layer
in_size = cnn.top_size
name_key = 'resnet_v2'
name = name_key + str(cnn.counts[name_key])
cnn.counts[name_key] += 1
preact = cnn.batch_norm()
mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
preact = tf.nn.relu(preact)
with tf.variable_scope(name):
if depth == in_size:
if stride == 1:
shortcut = input_layer
else:
shortcut = cnn.apool(
1, 1, stride, stride, input_layer=input_layer,
num_channels_in=in_size)
mlperf.logger.log_projection(input_tensor=input_layer,
output_tensor=shortcut)
else:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None, use_batch_norm=False,
input_layer=preact, num_channels_in=in_size, bias=None)
cnn.conv(depth_bottleneck, 1, 1, stride, stride,
input_layer=preact, num_channels_in=in_size,
use_batch_norm=True, bias=None)
cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
use_batch_norm=True, bias=None)
res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
use_batch_norm=False, bias=None)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
output = shortcut + res
cnn.top_layer = output
cnn.top_size = depth
def bottleneck_block(cnn, depth, depth_bottleneck, stride, version):
"""Bottleneck block with identity short-cut.
Args:
cnn: the network to append bottleneck blocks.
depth: the number of output filters for this bottleneck block.
depth_bottleneck: the number of bottleneck filters for this block.
stride: Stride used in the first layer of the bottleneck block.
version: version of ResNet to build.
"""
mlperf.logger.log(key=mlperf.tags.MODEL_HP_BLOCK_TYPE,
value=mlperf.tags.BOTTLENECK_BLOCK)
mlperf.logger.log_begin_block(
input_tensor=cnn.top_layer, block_type=mlperf.tags.BOTTLENECK_BLOCK)
if version == 'v2':
bottleneck_block_v2(cnn, depth, depth_bottleneck, stride)
elif version == 'v1.5':
bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride)
else:
bottleneck_block_v1(cnn, depth, depth_bottleneck, stride)
mlperf.logger.log_end_block(output_tensor=cnn.top_layer)
def residual_block(cnn, depth, stride, version, projection_shortcut=False):
"""Residual block with identity short-cut.
Args:
cnn: the network to append residual blocks.
depth: the number of output filters for this residual block.
stride: Stride used in the first layer of the residual block.
version: version of ResNet to build.
projection_shortcut: indicator of using projection shortcut, even if top
size and depth are equal
"""
pre_activation = True if version == 'v2' else False
input_layer = cnn.top_layer
in_size = cnn.top_size
if projection_shortcut:
shortcut = cnn.conv(
depth, 1, 1, stride, stride, activation=None,
use_batch_norm=True, input_layer=input_layer,
num_channels_in=in_size, bias=None)
elif in_size != depth:
# Plan A of shortcut.
shortcut = cnn.apool(1, 1, stride, stride,
input_layer=input_layer,
num_channels_in=in_size)
padding = (depth - in_size) // 2
if cnn.channel_pos == 'channels_last':
shortcut = tf.pad(
shortcut, [[0, 0], [0, 0], [0, 0], [padding, padding]])
else:
shortcut = tf.pad(
shortcut, [[0, 0], [padding, padding], [0, 0], [0, 0]])
else:
shortcut = input_layer
if pre_activation:
res = cnn.batch_norm(input_layer)
res = tf.nn.relu(res)
else:
res = input_layer
cnn.conv(depth, 3, 3, stride, stride,
input_layer=res, num_channels_in=in_size,
use_batch_norm=True, bias=None)
if pre_activation:
res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
use_batch_norm=False, bias=None)
output = shortcut + res
else:
res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
use_batch_norm=True, bias=None)
output = tf.nn.relu(shortcut + res)
cnn.top_layer = output
cnn.top_size = depth
class ResnetModel(model_lib.CNNModel):
"""Resnet cnn network configuration."""
def __init__(self, model, layer_counts, params=None):
default_batch_sizes = {
'resnet50': 64,
'resnet101': 32,
'resnet152': 32,
'resnet50_v1.5': 64,
'resnet101_v1.5': 32,
'resnet152_v1.5': 32,
'resnet50_v2': 64,
'resnet101_v2': 32,
'resnet152_v2': 32,
}
batch_size = default_batch_sizes.get(model, 32)
# The ResNet paper uses a starting lr of .1 at bs=256.
self.base_lr_batch_size = 256
base_lr = 0.128
if params and params.resnet_base_lr:
base_lr = params.resnet_base_lr
super(ResnetModel, self).__init__(model, 224, batch_size, base_lr,
layer_counts, params=params)
if 'v2' in model:
self.version = 'v2'
elif 'v1.5' in model:
self.version = 'v1.5'
else:
self.version = 'v1'
def add_inference(self, cnn):
if self.layer_counts is None:
raise ValueError('Layer counts not specified for %s' % self.get_model())
# Drop batch size from shape logging.
mlperf.logger.log(key=mlperf.tags.MODEL_HP_INITIAL_SHAPE,
value=cnn.top_layer.shape.as_list()[1:])
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
cnn.mpool(3, 3, 2, 2, mode='SAME')
for _ in xrange(self.layer_counts[0]):
bottleneck_block(cnn, 256, 64, 1, self.version)
for i in xrange(self.layer_counts[1]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 512, 128, stride, self.version)
for i in xrange(self.layer_counts[2]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 1024, 256, stride, self.version)
for i in xrange(self.layer_counts[3]):
stride = 2 if i == 0 else 1
bottleneck_block(cnn, 2048, 512, stride, self.version)
if self.version == 'v2':
cnn.batch_norm()
cnn.top_layer = tf.nn.relu(cnn.top_layer)
cnn.spatial_mean()
def get_learning_rate(self, global_step, batch_size):
rescaled_lr = self.get_scaled_base_learning_rate(batch_size)
num_batches_per_epoch = (
datasets.IMAGENET_NUM_TRAIN_IMAGES / batch_size)
boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
values = [1, 0.1, 0.01, 0.001, 0.0001]
values = [rescaled_lr * v for v in values]
lr = tf.train.piecewise_constant(global_step, boundaries, values)
warmup_steps = int(num_batches_per_epoch * 5)
mlperf.logger.log(key=mlperf.tags.OPT_LR_WARMUP_STEPS, value=warmup_steps)
warmup_lr = (
rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
def get_scaled_base_learning_rate(self, batch_size):
"""Calculates base learning rate for creating lr schedule.
In replicated mode, gradients are summed rather than averaged which, with
the sgd and momentum optimizers, increases the effective learning rate by
lr * num_gpus. Dividing the base lr by num_gpus negates the increase.
Args:
batch_size: Total batch-size.
Returns:
Base learning rate to use to create lr schedule.
"""
base_lr = self.learning_rate
if self.params.variable_update == 'replicated':
base_lr = self.learning_rate / self.params.num_gpus
scaled_lr = base_lr * (batch_size / self.base_lr_batch_size)
return scaled_lr
def create_resnet50_model(params):
return ResnetModel('resnet50', (3, 4, 6, 3), params=params)
def create_resnet50_v1_5_model(params):
return ResnetModel('resnet50_v1.5', (3, 4, 6, 3), params=params)
def create_resnet50_v2_model(params):
return ResnetModel('resnet50_v2', (3, 4, 6, 3), params=params)
def create_resnet101_model(params):
return ResnetModel('resnet101', (3, 4, 23, 3), params=params)
def create_resnet101_v2_model(params):
return ResnetModel('resnet101_v2', (3, 4, 23, 3), params=params)
def create_resnet152_model(params):
return ResnetModel('resnet152', (3, 8, 36, 3), params=params)
def create_resnet152_v2_model(params):
return ResnetModel('resnet152_v2', (3, 8, 36, 3), params=params)
class ResnetCifar10Model(model_lib.CNNModel):
"""Resnet cnn network configuration for Cifar 10 dataset.
V1 model architecture follows the one defined in the paper:
https://arxiv.org/pdf/1512.03385.pdf.
V2 model architecture follows the one defined in the paper:
https://arxiv.org/pdf/1603.05027.pdf.
"""
def __init__(self, model, layer_counts, params=None):
if 'v2' in model:
self.version = 'v2'
else:
self.version = 'v1'
super(ResnetCifar10Model, self).__init__(
model, 32, 128, 0.1, layer_counts, params=params)
def add_inference(self, cnn):
if self.layer_counts is None:
raise ValueError('Layer counts not specified for %s' % self.get_model())
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
if self.version == 'v2':
cnn.conv(16, 3, 3, 1, 1, use_batch_norm=True)
else:
cnn.conv(16, 3, 3, 1, 1, activation=None, use_batch_norm=True)
for i in xrange(self.layer_counts[0]):
# reshape to batch_size x 16 x 32 x 32
residual_block(cnn, 16, 1, self.version)
for i in xrange(self.layer_counts[1]):
# Subsampling is performed at the first convolution with a stride of 2
stride = 2 if i == 0 else 1
# reshape to batch_size x 32 x 16 x 16
residual_block(cnn, 32, stride, self.version)
for i in xrange(self.layer_counts[2]):
stride = 2 if i == 0 else 1
# reshape to batch_size x 64 x 8 x 8
residual_block(cnn, 64, stride, self.version)
if self.version == 'v2':
cnn.batch_norm()
cnn.top_layer = tf.nn.relu(cnn.top_layer)
cnn.spatial_mean()
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = int(50000 / batch_size)
boundaries = num_batches_per_epoch * np.array([82, 123, 300],
dtype=np.int64)
boundaries = [x for x in boundaries]
values = [0.1, 0.01, 0.001, 0.0002]
return tf.train.piecewise_constant(global_step, boundaries, values)
def create_resnet20_cifar_model(params):
return ResnetCifar10Model('resnet20', (3, 3, 3), params=params)
def create_resnet20_v2_cifar_model(params):
return ResnetCifar10Model('resnet20_v2', (3, 3, 3), params=params)
def create_resnet32_cifar_model(params):
return ResnetCifar10Model('resnet32', (5, 5, 5), params=params)
def create_resnet32_v2_cifar_model(params):
return ResnetCifar10Model('resnet32_v2', (5, 5, 5), params=params)
def create_resnet44_cifar_model(params):
return ResnetCifar10Model('resnet44', (7, 7, 7), params=params)
def create_resnet44_v2_cifar_model(params):
return ResnetCifar10Model('resnet44_v2', (7, 7, 7), params=params)
def create_resnet56_cifar_model(params):
return ResnetCifar10Model('resnet56', (9, 9, 9), params=params)
def create_resnet56_v2_cifar_model(params):
return ResnetCifar10Model('resnet56_v2', (9, 9, 9), params=params)
def create_resnet110_cifar_model(params):
return ResnetCifar10Model('resnet110', (18, 18, 18), params=params)
def create_resnet110_v2_cifar_model(params):
return ResnetCifar10Model('resnet110_v2', (18, 18, 18), params=params)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for resnet_model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import mock
import tensorflow.compat.v1 as tf
from models import resnet_model
class ResNetModelTest(tf.test.TestCase):
def testGetScaledBaseLearningRateOneGpuLrFromParams(self):
"""Verifies setting params.resnet_base_lr pipes through."""
lr = self._get_scaled_base_learning_rate(1,
'parameter_server',
256,
base_lr=.050)
self.assertEqual(lr, .050)
def testGetScaledBaseLearningRateOneGpu(self):
lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128)
self.assertEqual(lr, .064)
def testGetScaledBaseLearningRateEightGpuReplicated(self):
lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8)
self.assertEqual(lr, .128)
def testGetScaledBaseLearningRateTwoGpuParameter(self):
lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2)
self.assertEqual(lr, .256)
def testGetScaledBaseLearningRateTwoGpuUneven(self):
lr = self._get_scaled_base_learning_rate(2, 'replicated', 13)
self.assertEqual(lr, 0.0032500000000000003)
def _get_scaled_base_learning_rate(self,
num_gpus,
variable_update,
batch_size,
base_lr=None):
"""Simplifies testing different learning rate calculations.
Args:
num_gpus: Number of GPUs to be used.
variable_update: Type of variable update used.
batch_size: Total batch size.
base_lr: Base learning rate before scaling.
Returns:
Base learning rate that would be used to create lr schedule.
"""
params = mock.Mock()
params.num_gpus = num_gpus
params.variable_update = variable_update
if base_lr:
params.resnet_base_lr = base_lr
resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params)
return resnet50_model.get_scaled_base_learning_rate(batch_size)
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment