del tensorflow benchmark cls

6b6f8b0c · huchen · 4749cd5e · 4749cd5e · 4749cd5e · 4749cd5e
Commit 6b6f8b0c authored Apr 15, 2022 by huchen
20 changed files
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/densenet_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/densenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Densenet model configuration.
-
-References:
-  "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-from models import model as model_lib
-
-
-class DensenetCifar10Model(model_lib.CNNModel):
-  """Densenet cnn network configuration."""
-
-  def __init__(self, model, layer_counts, growth_rate, params=None):
-    self.growth_rate = growth_rate
-    super(DensenetCifar10Model, self).__init__(
-        model, 32, 64, 0.1, layer_counts=layer_counts, params=params)
-    self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
-
-  def dense_block(self, cnn, growth_rate):
-    input_layer = cnn.top_layer
-    c = cnn.batch_norm(input_layer, **self.batch_norm_config)
-    c = tf.nn.relu(c)
-    c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
-                 activation=None, input_layer=c)
-    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
-    cnn.top_layer = tf.concat([input_layer, c], channel_index)
-    cnn.top_size += growth_rate
-
-  def transition_layer(self, cnn):
-    in_size = cnn.top_size
-    cnn.batch_norm(**self.batch_norm_config)
-    cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
-    cnn.apool(2, 2, 2, 2)
-
-  def add_inference(self, cnn):
-    if self.layer_counts is None:
-      raise ValueError('Layer counts not specified for %s' % self.get_model())
-    if self.growth_rate is None:
-      raise ValueError('Growth rate not specified for %s' % self.get_model())
-
-    cnn.conv(16, 3, 3, 1, 1, activation=None)
-    # Block 1
-    for _ in xrange(self.layer_counts[0]):
-      self.dense_block(cnn, self.growth_rate)
-    self.transition_layer(cnn)
-    # Block 2
-    for _ in xrange(self.layer_counts[1]):
-      self.dense_block(cnn, self.growth_rate)
-    self.transition_layer(cnn)
-    # Block 3
-    for _ in xrange(self.layer_counts[2]):
-      self.dense_block(cnn, self.growth_rate)
-    cnn.batch_norm(**self.batch_norm_config)
-    cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
-    cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
-    cnn.spatial_mean()
-
-  def get_learning_rate(self, global_step, batch_size):
-    num_batches_per_epoch = 50000 // batch_size
-    boundaries = num_batches_per_epoch * np.array([150, 225, 300],
-                                                  dtype=np.int64)
-    boundaries = [x for x in boundaries]
-    values = [0.1, 0.01, 0.001, 0.0001]
-    return tf.train.piecewise_constant(global_step, boundaries, values)
-
-
-def create_densenet40_k12_model():
-  return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
-
-
-def create_densenet100_k12_model():
-  return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
-
-
-def create_densenet100_k24_model():
-  return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/__init__.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/__init__.cpython-36.pyc
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/deepspeech.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/deepspeech.cpython-36.pyc
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/official_ncf_model.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/__pycache__/official_ncf_model.cpython-36.pyc
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""DeepSpeech2 model configuration.
-
-References:
-  https://arxiv.org/abs/1512.02595
-  Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-import constants
-from cnn_util import log_fn
-from models import model as model_lib
-from tensorflow.python.ops import variables  # pylint: disable=g-direct-tensorflow-import
-
-
-class DeepSpeechDecoder(object):
-  """Greedy decoder implementation for Deep Speech model."""
-
-  def __init__(self, labels, blank_index=28):
-    """Decoder initialization.
-
-    Arguments:
-      labels: a string specifying the speech labels for the decoder to use.
-      blank_index: an integer specifying index for the blank character. Defaults
-        to 28.
-    """
-    self.labels = labels
-    self.blank_index = blank_index
-    self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)])
-
-  def convert_to_string(self, sequence):
-    """Convert a sequence of indexes into corresponding string."""
-    return ''.join([self.int_to_char[i] for i in sequence])
-
-  def wer(self, decode, target):
-    """Computes the Word Error Rate (WER).
-
-    WER is defined as the edit distance between the two provided sentences after
-    tokenizing to words.
-
-    Args:
-      decode: string of the decoded output.
-      target: a string for the ground truth label.
-
-    Returns:
-      A float number for the WER of the current decode-target pair.
-    """
-    try:
-      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'nltk.metrics' not in e.message:
-        raise
-      raise ImportError('To use the experimental deepspeech model, you must '
-                        'pip install -U nltk')
-
-    # Map each word to a new char.
-    words = set(decode.split() + target.split())
-    word2char = dict(zip(words, range(len(words))))
-
-    new_decode = [chr(word2char[w]) for w in decode.split()]
-    new_target = [chr(word2char[w]) for w in target.split()]
-
-    return distance.edit_distance(''.join(new_decode), ''.join(new_target))
-
-  def cer(self, decode, target):
-    """Computes the Character Error Rate (CER).
-
-    CER is defined as the edit distance between the two given strings.
-
-    Args:
-      decode: a string of the decoded output.
-      target: a string for the ground truth label.
-
-    Returns:
-      A float number denoting the CER for the current sentence pair.
-    """
-    try:
-      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'nltk.metrics' not in e.message:
-        raise
-      raise ImportError('To use the experimental deepspeech model, you must '
-                        'pip install -U nltk')
-    return distance.edit_distance(decode, target)
-
-  def decode(self, char_indexes):
-    """Decode the best guess from logits using greedy algorithm."""
-    # Merge repeated chars.
-    merge = [k for k, _ in itertools.groupby(char_indexes)]
-    # Remove the blank index in the decoded sequence.
-    merge_remove_blank = []
-    for k in merge:
-      if k != self.blank_index:
-        merge_remove_blank.append(k)
-
-    return self.convert_to_string(merge_remove_blank)
-
-  def decode_logits(self, logits):
-    """Decode the best guess from logits using greedy algorithm."""
-    # Choose the class with maximimum probability.
-    best = list(np.argmax(logits, axis=1))
-    return self.decode(best)
-
-
-class DeepSpeech2Model(model_lib.Model):
-  """Define DeepSpeech2 model."""
-
-  # Supported rnn cells.
-  SUPPORTED_RNNS = {
-      'lstm': tf.nn.rnn_cell.BasicLSTMCell,
-      'rnn': tf.nn.rnn_cell.RNNCell,
-      'gru': tf.nn.rnn_cell.GRUCell,
-  }
-
-  # Parameters for batch normalization.
-  BATCH_NORM_EPSILON = 1e-5
-  BATCH_NORM_DECAY = 0.997
-
-  # Filters of convolution layer
-  CONV_FILTERS = 32
-
-  def __init__(self,
-               num_rnn_layers=5,
-               rnn_type='lstm',
-               is_bidirectional=True,
-               rnn_hidden_size=800,
-               use_bias=True,
-               params=None):
-    """Initialize DeepSpeech2 model.
-
-    Args:
-      num_rnn_layers: an integer, the number of rnn layers (default: 5).
-      rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
-      is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
-      rnn_hidden_size: an integer for the number of hidden units in the RNN
-        cell.
-      use_bias: a boolean specifying whether to use a bias in the last fc layer.
-      params: the params from BenchmarkCNN.
-    """
-    super(DeepSpeech2Model, self).__init__(
-        'deepspeech2',
-        batch_size=128,
-        learning_rate=0.0005,
-        fp16_loss_scale=128,
-        params=params)
-    self.num_rnn_layers = num_rnn_layers
-    self.rnn_type = rnn_type
-    self.is_bidirectional = is_bidirectional
-    self.rnn_hidden_size = rnn_hidden_size
-    self.use_bias = use_bias
-    self.num_feature_bins = 161
-    self.max_time_steps = 3494
-    self.max_label_length = 576
-
-  def _batch_norm(self, inputs, training):
-    """Batch normalization layer.
-
-    Note that the momentum to use will affect validation accuracy over time.
-    Batch norm has different behaviors during training/evaluation. With a large
-    momentum, the model takes longer to get a near-accurate estimation of the
-    moving mean/variance over the entire training dataset, which means we need
-    more iterations to see good evaluation results. If the training data is
-    evenly distributed over the feature space, we can also try setting a smaller
-    momentum (such as 0.1) to get good evaluation result sooner.
-
-    Args:
-      inputs: input data for batch norm layer.
-      training: a boolean to indicate if it is in training stage.
-
-    Returns:
-      tensor output from batch norm layer.
-    """
-    return tf.layers.batch_normalization(
-        inputs=inputs,
-        momentum=DeepSpeech2Model.BATCH_NORM_DECAY,
-        epsilon=DeepSpeech2Model.BATCH_NORM_EPSILON,
-        fused=True,
-        training=training)
-
-  def _conv_bn_layer(self, inputs, padding, filters, kernel_size, strides,
-                     layer_id, training):
-    """Defines 2D convolutional + batch normalization layer.
-
-    Args:
-      inputs: input data for convolution layer.
-      padding: padding to be applied before convolution layer.
-      filters: an integer, number of output filters in the convolution.
-      kernel_size: a tuple specifying the height and width of the 2D convolution
-        window.
-      strides: a tuple specifying the stride length of the convolution.
-      layer_id: an integer specifying the layer index.
-      training: a boolean to indicate which stage we are in (training/eval).
-
-    Returns:
-      tensor output from the current layer.
-    """
-    # Perform symmetric padding on the feature dimension of time_step
-    # This step is required to avoid issues when RNN output sequence is shorter
-    # than the label length.
-    inputs = tf.pad(
-        inputs,
-        [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]])
-    inputs = tf.layers.conv2d(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding='valid',
-        use_bias=False,
-        activation=tf.nn.relu6,
-        name='cnn_{}'.format(layer_id))
-    return self._batch_norm(inputs, training)
-
-  def _rnn_layer(self, inputs, rnn_cell, rnn_hidden_size, layer_id,
-                 use_batch_norm, is_bidirectional, training):
-    """Defines a batch normalization + rnn layer.
-
-    Args:
-      inputs: input tensors for the current layer.
-      rnn_cell: RNN cell instance to use.
-      rnn_hidden_size: an integer for the dimensionality of the rnn output
-        space.
-      layer_id: an integer for the index of current layer.
-      use_batch_norm: a boolean specifying whether to perform batch
-        normalization on input states.
-      is_bidirectional: a boolean specifying whether the rnn layer is
-        bi-directional.
-      training: a boolean to indicate which stage we are in (training/eval).
-
-    Returns:
-      tensor output for the current layer.
-    """
-    if use_batch_norm:
-      inputs = self._batch_norm(inputs, training)
-
-    # Construct forward/backward RNN cells.
-    fw_cell = rnn_cell(
-        num_units=rnn_hidden_size, name='rnn_fw_{}'.format(layer_id))
-
-    if is_bidirectional:
-      bw_cell = rnn_cell(
-          num_units=rnn_hidden_size, name='rnn_bw_{}'.format(layer_id))
-      outputs, _ = tf.nn.bidirectional_dynamic_rnn(
-          cell_fw=fw_cell,
-          cell_bw=bw_cell,
-          inputs=inputs,
-          dtype=tf.float32,
-          swap_memory=True)
-      rnn_outputs = tf.concat(outputs, -1)
-    else:
-      rnn_outputs = tf.nn.dynamic_rnn(
-          fw_cell, inputs, dtype=tf.float32, swap_memory=True)
-
-    return rnn_outputs
-
-  def get_input_data_types(self, subset):
-    """Returns the list of data types of the inputs."""
-    del subset  # Same data types for both train and validation subsets.
-    return [self.data_type, tf.int32, tf.int32, tf.int32]
-
-  def get_input_shapes(self, subset):
-    """Returns the list of shapes of the padded inputs."""
-    del subset  # Same shapes for both train and validation subsets
-    return [
-        [self.batch_size, self.max_time_steps, self.num_feature_bins, 1],
-        [self.batch_size, self.max_label_length],
-        [self.batch_size, 1],
-        [self.batch_size, 1],
-    ]
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    inputs = tf.random_uniform(self.get_input_shapes('train')[0],
-                               dtype=self.get_input_data_types('train')[0])
-    inputs = variables.VariableV1(inputs, trainable=False,
-                                  collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                                  name=input_name)
-    labels = tf.convert_to_tensor(
-        np.random.randint(28, size=[self.batch_size, self.max_label_length]))
-    input_lengths = tf.convert_to_tensor(
-        [self.max_time_steps] * self.batch_size)
-    label_lengths = tf.convert_to_tensor(
-        [self.max_label_length] * self.batch_size)
-    return [inputs, labels, input_lengths, label_lengths]
-
-  # TODO(laigd): support fp16.
-  # TODO(laigd): support multiple gpus.
-  def build_network(self, inputs, phase_train=True, nclass=29):
-    """Builds the forward pass of the deepspeech2 model.
-
-    Args:
-      inputs: The input list of the model.
-      phase_train: True during training. False during evaluation.
-      nclass: Number of classes that the input spectrogram can belong to.
-
-    Returns:
-      A BuildNetworkResult which contains the logits and model-specific extra
-        information.
-    """
-    inputs = inputs[0]  # Get the spectrogram feature.
-
-    # Two cnn layers.
-    inputs = self._conv_bn_layer(
-        inputs,
-        padding=(20, 5),
-        filters=DeepSpeech2Model.CONV_FILTERS,
-        kernel_size=(41, 11),
-        strides=(2, 2),
-        layer_id=1,
-        training=phase_train)
-
-    inputs = self._conv_bn_layer(
-        inputs,
-        padding=(10, 5),
-        filters=DeepSpeech2Model.CONV_FILTERS,
-        kernel_size=(21, 11),
-        strides=(2, 1),
-        layer_id=2,
-        training=phase_train)
-
-    # output of conv_layer2 with the shape of
-    # [batch_size (N), times (T), features (F), channels (C)].
-    # Convert the conv output to rnn input.
-
-    # batch_size = tf.shape(inputs)[0]
-    feat_size = inputs.get_shape().as_list()[2]
-    inputs = tf.reshape(
-        inputs,
-        [self.batch_size, -1, feat_size * DeepSpeech2Model.CONV_FILTERS])
-
-    # RNN layers.
-    rnn_cell = DeepSpeech2Model.SUPPORTED_RNNS[self.rnn_type]
-    for layer_counter in xrange(self.num_rnn_layers):
-      # No batch normalization on the first layer.
-      use_batch_norm = (layer_counter != 0)
-      inputs = self._rnn_layer(inputs, rnn_cell, self.rnn_hidden_size,
-                               layer_counter + 1, use_batch_norm,
-                               self.is_bidirectional, phase_train)
-
-    # FC layer with batch norm.
-    inputs = self._batch_norm(inputs, phase_train)
-    logits = tf.layers.dense(inputs, nclass, use_bias=self.use_bias)
-
-    return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
-
-  def loss_function(self, inputs, build_network_result):
-    """Computes the ctc loss for the current batch of predictions.
-
-    Args:
-      inputs: the input list of the model.
-      build_network_result: a BuildNetworkResult returned by build_network().
-
-    Returns:
-      The loss tensor of the model.
-    """
-    logits = build_network_result.logits
-    actual_time_steps = inputs[2]
-    probs = tf.nn.softmax(logits)
-    ctc_time_steps = tf.shape(probs)[1]
-    ctc_input_length = tf.to_float(
-        tf.multiply(actual_time_steps, ctc_time_steps))
-    ctc_input_length = tf.to_int32(
-        tf.floordiv(ctc_input_length, tf.to_float(self.max_time_steps)))
-
-    label_length = inputs[3]
-    label_length = tf.to_int32(tf.squeeze(label_length))
-    ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length))
-
-    labels = inputs[1]
-    sparse_labels = tf.to_int32(
-        tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length))
-    y_pred = tf.log(
-        tf.transpose(probs, perm=[1, 0, 2]) + tf.keras.backend.epsilon())
-
-    losses = tf.expand_dims(
-        tf.nn.ctc_loss(
-            labels=sparse_labels,
-            inputs=y_pred,
-            sequence_length=ctc_input_length,
-            ignore_longer_outputs_than_inputs=True),
-        axis=1)
-    loss = tf.reduce_mean(losses)
-    return loss
-
-  PROBABILITY_TENSOR = 'deepspeech2_prob'
-  LABEL_TENSOR = 'deepspeech2_label'
-
-  def accuracy_function(self, inputs, logits):
-    """Returns the ops to evaluate the model performance."""
-    # Get probabilities of each predicted class
-    probs = tf.nn.softmax(logits)
-    assert probs.shape.as_list()[0] == self.batch_size
-    return {
-        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.PROBABILITY_TENSOR):
-            probs,
-        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.LABEL_TENSOR):
-            inputs[1],
-    }
-
-  def postprocess(self, results):
-    """Postprocess results returned from model in Python."""
-    probs = results[self.PROBABILITY_TENSOR]
-
-    total_wer, total_cer = 0, 0
-    speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
-    greedy_decoder = DeepSpeechDecoder(speech_labels)
-
-    # Evaluate the performance using WER (Word Error Rate) and CER (Character
-    # Error Rate) as metrics.
-    targets = results[self.LABEL_TENSOR]  # The ground truth transcript
-    for i in range(self.batch_size):
-      # Decode string.
-      predicted_str = greedy_decoder.decode_logits(probs[i])
-      expected_str = greedy_decoder.decode(targets[i])
-      # Compute CER.
-      total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
-                    len(expected_str))
-      # Compute WER.
-      total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
-                    len(expected_str.split()))
-
-    # Get mean value
-    total_cer /= self.batch_size
-    total_wer /= self.batch_size
-
-    log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
-        total_cer, total_wer, self.batch_size))
-    # TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
-    return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
-
-This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
-Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
-intended to be used only with CNNs.
-
-Only synthetic data with 1 GPU is currently supported.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-
-from models import model
-
-
-# Obtained by running the official NCF model with the following command:
-#     python ncf_main.py  --dataset ml-20m
-# and printing the number of users and items here:
-# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
-_NUM_USERS_20M = 138493
-_NUM_ITEMS_20M = 26744
-
-
-# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
-# uses, ignore variable_scopes, which we rely on for multi-GPU support.
-# TODO(reedwm): Support real data. This will require a significant refactor.
-# TODO(reedwm): All-reduce IndexedSlices more effectively.
-# TODO(reedwm): Support the 1M variant of this model.
-
-
-class NcfModel(model.Model):
-  r"""A model.Model wrapper around the official NCF recommendation model.
-
-  To do an NCF run with synthetic data that roughly matches what the official
-  model does, run:
-
-  python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
-      --weight_decay=0 --sparse_to_dense_grads
-  """
-
-  def __init__(self, params=None):
-    super(NcfModel, self).__init__(
-        'official_ncf', batch_size=2048, learning_rate=0.0005,
-        fp16_loss_scale=128, params=params)
-    if self.fp16_vars:
-      raise ValueError('NCF model only supports float32 variables for now.')
-
-  def build_network(self, inputs, phase_train=True, nclass=1001):
-    try:
-      from official.recommendation import neumf_model  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'neumf_model' not in e.message:
-        raise
-      raise ImportError('To use the experimental NCF model, you must clone the '
-                        'repo https://github.com/tensorflow/models and add '
-                        'tensorflow/models to the PYTHONPATH.')
-    del nclass
-
-    users, items, _ = inputs
-    params = {
-        'num_users': _NUM_USERS_20M,
-        'num_items': _NUM_ITEMS_20M,
-        'model_layers': (256, 256, 128, 64),
-        'mf_dim': 64,
-        'mf_regularization': 0,
-        'mlp_reg_layers': (0, 0, 0, 0),
-        'use_tpu': False
-    }
-    user_input = tf.keras.layers.Input(tensor=users, name='user_input')
-    item_input = tf.keras.layers.Input(tensor=items, name='item_input')
-    if self.data_type == tf.float32:
-      keras_model = neumf_model.construct_model(user_input, item_input, params)
-      logits = keras_model.output
-    else:
-      assert self.data_type == tf.float16
-      old_floatx = tf.keras.backend.floatx()
-      try:
-        tf.keras.backend.set_floatx('float16')
-        # We cannot rely on the variable_scope's fp16 custom getter here,
-        # because the NCF model uses keras layers, which ignore variable scopes.
-        # So we use a variable_creator_scope instead.
-        with tf.variable_creator_scope(_fp16_variable_creator):
-          keras_model = neumf_model.construct_model(user_input, item_input,
-                                                    params)
-        logits = tf.cast(keras_model.output, tf.float32)
-      finally:
-        tf.keras.backend.set_floatx(old_floatx)
-    return model.BuildNetworkResult(logits=logits, extra_info=None)
-
-  def loss_function(self, inputs, build_network_result):
-    logits = build_network_result.logits
-
-    # Softmax with the first column of ones is equivalent to sigmoid.
-    # TODO(reedwm): Actually, the first column should be zeros to be equivalent
-    # to sigmoid. But, we keep it at ones to match the official models.
-    logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits],
-                       axis=1)
-
-    return tf.losses.sparse_softmax_cross_entropy(
-        labels=inputs[2],
-        logits=logits
-    )
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    """Returns the ops to generate synthetic inputs and labels."""
-    def users_init_val():
-      return tf.random_uniform((self.batch_size, 1), minval=0,
-                               maxval=_NUM_USERS_20M, dtype=tf.int32)
-    users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False,
-                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                        name='synthetic_users')
-    def items_init_val():
-      return tf.random_uniform((self.batch_size, 1), minval=0,
-                               maxval=_NUM_ITEMS_20M, dtype=tf.int32)
-    items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False,
-                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                        name='synthetic_items')
-
-    def labels_init_val():
-      return tf.random_uniform((self.batch_size,), minval=0, maxval=2,
-                               dtype=tf.int32)
-    labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False,
-                         collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                         name='synthetic_labels')
-
-    return [users, items, labels]
-
-  def get_input_shapes(self, subset):
-    del subset
-    return [[self.batch_size, 1], [self.batch_size, 1], [self.batch_size]]
-
-  def get_input_data_types(self, subset):
-    del subset
-    return [self.int32, tf.int32, tf.int32]
-
-
-def _fp16_variable_creator(next_creator, **kwargs):
-  """Variable creator to create variables in fp32 and cast them to fp16."""
-  dtype = kwargs.get('dtype', None)
-  initial_value = kwargs.get('initial_value', None)
-  if dtype is None:
-    if initial_value is not None and not callable(initial_value):
-      dtype = initial_value.dtype
-  if dtype == tf.float16:
-    if callable(initial_value):
-      new_initial_value = lambda: tf.cast(initial_value(), tf.float32)
-    else:
-      new_initial_value = tf.cast(initial_value, tf.float32)
-    kwargs['dtype'] = tf.float32
-    kwargs['initial_value'] = new_initial_value
-    var = next_creator(**kwargs)
-    return tf.cast(var, dtype=tf.float16)
-  else:
-    return next_creator(**kwargs)
-
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/googlenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Googlenet model configuration.
-
-References:
-  Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
-  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
-  Going deeper with convolutions
-  arXiv preprint arXiv:1409.4842 (2014)
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from models import model
-
-
-class GooglenetModel(model.CNNModel):
-  """GoogLeNet."""
-
-  def __init__(self, params=None):
-    super(GooglenetModel, self).__init__(
-        'googlenet', 224, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-
-    def inception_v1(cnn, k, l, m, n, p, q):
-      cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
-              [('conv', n, 1, 1), ('conv', p, 5, 5)],
-              [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
-      cnn.inception_module('incept_v1', cols)
-
-    cnn.conv(64, 7, 7, 2, 2)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    cnn.conv(64, 1, 1)
-    cnn.conv(192, 3, 3)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 64, 96, 128, 16, 32, 32)
-    inception_v1(cnn, 128, 128, 192, 32, 96, 64)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 192, 96, 208, 16, 48, 64)
-    inception_v1(cnn, 160, 112, 224, 24, 64, 64)
-    inception_v1(cnn, 128, 128, 256, 24, 64, 64)
-    inception_v1(cnn, 112, 144, 288, 32, 64, 64)
-    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
-    inception_v1(cnn, 384, 192, 384, 48, 128, 128)
-    cnn.apool(7, 7, 1, 1, mode='VALID')
-    cnn.reshape([-1, 1024])
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/inception_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/inception_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Inception model configuration.
-
-Includes multiple models: inception3, inception4, inception-resnet2.
-
-References:
-  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
-  Inception-v4, Inception-ResNet and the Impact of Residual Connections on
-  Learning
-
-  Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
-  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
-  Going Deeper with Convolutions
-  http://arxiv.org/pdf/1409.4842v1.pdf
-
-  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
-  Zbigniew Wojna
-  Rethinking the Inception Architecture for Computer Vision
-  arXiv preprint arXiv:1512.00567 (2015)
-
-  Inception v3 model: http://arxiv.org/abs/1512.00567
-
-  Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from six.moves import xrange  # pylint: disable=redefined-builtin
-from models import model
-
-
-class Inceptionv3Model(model.CNNModel):
-  """InceptionV3."""
-
-  def __init__(self, auxiliary=False, params=None):
-    self._auxiliary = auxiliary
-    super(Inceptionv3Model, self).__init__(
-        'inception3', 299, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    def inception_v3_a(cnn, n):
-      cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
-              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
-              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
-      cnn.inception_module('incept_v3_a', cols)
-
-    def inception_v3_b(cnn):
-      cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
-              [('conv', 64, 1, 1),
-               ('conv', 96, 3, 3),
-               ('conv', 96, 3, 3, 2, 2, 'VALID')],
-              [('mpool', 3, 3, 2, 2, 'VALID')]]
-      cnn.inception_module('incept_v3_b', cols)
-
-    def inception_v3_c(cnn, n):
-      cols = [[('conv', 192, 1, 1)],
-              [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
-              [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
-               ('conv', n, 7, 1), ('conv', 192, 1, 7)],
-              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
-      cnn.inception_module('incept_v3_c', cols)
-
-    def inception_v3_d(cnn):
-      cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
-              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
-               ('conv', 192, 3, 3, 2, 2, 'VALID')],
-              [('mpool', 3, 3, 2, 2, 'VALID')]]
-      cnn.inception_module('incept_v3_d', cols)
-
-    def inception_v3_e(cnn, pooltype):
-      cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
-              [('share',), ('conv', 384, 3, 1)],
-              [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
-              [('share',), ('share',), ('conv', 384, 3, 1)],
-              [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
-               ('conv', 192, 1, 1)]]
-      cnn.inception_module('incept_v3_e', cols)
-
-    def incept_v3_aux(cnn):
-      assert cnn.aux_top_layer is None
-      cnn.aux_top_layer = cnn.top_layer
-      cnn.aux_top_size = cnn.top_size
-      with cnn.switch_to_aux_top_layer():
-        cnn.apool(5, 5, 3, 3, mode='VALID')
-        cnn.conv(128, 1, 1, mode='SAME')
-        cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
-        cnn.reshape([-1, 768])
-
-    cnn.use_batch_norm = True
-    cnn.conv(32, 3, 3, 2, 2, mode='VALID')   # 299 x 299 x 3
-    cnn.conv(32, 3, 3, 1, 1, mode='VALID')   # 149 x 149 x 32
-    cnn.conv(64, 3, 3, 1, 1, mode='SAME')    # 147 x 147 x 64
-    cnn.mpool(3, 3, 2, 2, mode='VALID')      # 147 x 147 x 64
-    cnn.conv(80, 1, 1, 1, 1, mode='VALID')   # 73 x 73 x 80
-    cnn.conv(192, 3, 3, 1, 1, mode='VALID')  # 71 x 71 x 192
-    cnn.mpool(3, 3, 2, 2, 'VALID')           # 35 x 35 x 192
-    inception_v3_a(cnn, 32)                  # 35 x 35 x 256 mixed.
-    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_1.
-    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_2
-    inception_v3_b(cnn)                      # 17 x 17 x 768 mixed_3
-    inception_v3_c(cnn, 128)                 # 17 x 17 x 768 mixed_4
-    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_5
-    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_6
-    inception_v3_c(cnn, 192)                 # 17 x 17 x 768 mixed_7
-    if self._auxiliary:
-      incept_v3_aux(cnn)                     # Auxillary Head logits
-    inception_v3_d(cnn)                      # 17 x 17 x 1280 mixed_8
-    inception_v3_e(cnn, 'avg')               # 8 x 8 x 2048 mixed_9
-    inception_v3_e(cnn, 'max')               # 8 x 8 x 2048 mixed_10
-    cnn.apool(8, 8, 1, 1, 'VALID')           # 8 x 8 x 2048
-    cnn.reshape([-1, 2048])                  # 1 x 1 x 2048
-
-
-# Stem functions
-def inception_v4_sa(cnn):
-  cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_sa', cols)
-
-
-def inception_v4_sb(cnn):
-  cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
-          [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
-           ('conv', 96, 3, 3, 1, 1, 'VALID')]]
-  cnn.inception_module('incept_v4_sb', cols)
-
-
-def inception_v4_sc(cnn):
-  cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
-          [('mpool', 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_sc', cols)
-
-
-# Reduction functions
-def inception_v4_ra(cnn, k, l, m, n):
-  cols = [
-      [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
-      [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
-  ]
-  cnn.inception_module('incept_v4_ra', cols)
-
-
-def inception_v4_rb(cnn):
-  cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
-          [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
-          [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
-           ('conv', 320, 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_rb', cols)
-
-
-class Inceptionv4Model(model.CNNModel):
-  """Inceptionv4."""
-
-  def __init__(self, params=None):
-    super(Inceptionv4Model, self).__init__(
-        'inception4', 299, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    def inception_v4_a(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
-              [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
-              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
-      cnn.inception_module('incept_v4_a', cols)
-
-    def inception_v4_b(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
-              [('conv', 384, 1, 1)],
-              [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
-              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
-               ('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
-      cnn.inception_module('incept_v4_b', cols)
-
-    def inception_v4_c(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
-              [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
-              [('share',), ('conv', 256, 3, 1)],
-              [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
-               ('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
-                                      ('conv', 256, 1, 3)]]
-      cnn.inception_module('incept_v4_c', cols)
-
-    cnn.use_batch_norm = True
-    cnn.conv(32, 3, 3, 2, 2, mode='VALID')
-    cnn.conv(32, 3, 3, 1, 1, mode='VALID')
-    cnn.conv(64, 3, 3)
-    inception_v4_sa(cnn)
-    inception_v4_sb(cnn)
-    inception_v4_sc(cnn)
-    for _ in xrange(4):
-      inception_v4_a(cnn)
-    inception_v4_ra(cnn, 192, 224, 256, 384)
-    for _ in xrange(7):
-      inception_v4_b(cnn)
-    inception_v4_rb(cnn)
-    for _ in xrange(3):
-      inception_v4_c(cnn)
-    cnn.spatial_mean()
-    cnn.dropout(0.8)
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/lenet_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/lenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Lenet model configuration.
-
-References:
-  LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
-  Gradient-based learning applied to document recognition
-  Proceedings of the IEEE (1998)
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from models import model
-
-
-class Lenet5Model(model.CNNModel):
-  """Lenet5."""
-
-  def __init__(self, params=None):
-    super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    # Note: This matches TF's MNIST tutorial model
-    cnn.conv(32, 5, 5)
-    cnn.mpool(2, 2)
-    cnn.conv(64, 5, 5)
-    cnn.mpool(2, 2)
-    cnn.reshape([-1, 64 * 7 * 7])
-    cnn.affine(512)
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base model configuration for CNN benchmarks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import namedtuple
-
-import tensorflow.compat.v1 as tf
-
-import convnet_builder
-import mlperf
-from tensorflow.python.ops import variables as variables_module  # pylint: disable=g-direct-tensorflow-import
-
-# BuildNetworkResult encapsulate the result (e.g. logits) of a
-# Model.build_network() call.
-BuildNetworkResult = namedtuple(
-    'BuildNetworkResult',
-    [
-        'logits',  # logits of the network
-        'extra_info',  # Model specific extra information
-    ])
-
-
-class Model(object):
-  """Base model config for DNN benchmarks."""
-
-  def __init__(self,
-               model_name,
-               batch_size,
-               learning_rate,
-               fp16_loss_scale,
-               params=None):
-    self.model_name = model_name
-    self.batch_size = batch_size
-    self.default_batch_size = batch_size
-    self.learning_rate = learning_rate
-    # TODO(reedwm) Set custom loss scales for each model instead of using the
-    # default of 128.
-    self.fp16_loss_scale = fp16_loss_scale
-
-    # use_tf_layers specifies whether to build the model using tf.layers.
-    # fp16_vars specifies whether to create the variables in float16.
-    if params:
-      self.use_tf_layers = params.use_tf_layers
-      self.fp16_vars = params.fp16_vars
-      self.data_type = tf.float16 if params.use_fp16 else tf.float32
-    else:
-      self.use_tf_layers = True
-      self.fp16_vars = False
-      self.data_type = tf.float32
-
-  def get_model_name(self):
-    return self.model_name
-
-  def get_batch_size(self):
-    return self.batch_size
-
-  def set_batch_size(self, batch_size):
-    self.batch_size = batch_size
-
-  def get_default_batch_size(self):
-    return self.default_batch_size
-
-  def get_fp16_loss_scale(self):
-    return self.fp16_loss_scale
-
-  def filter_l2_loss_vars(self, variables):
-    """Filters out variables that the L2 loss should not be computed for.
-
-    By default, this filters out batch normalization variables and keeps all
-    other variables. This behavior can be overridden by subclasses.
-
-    Args:
-      variables: A list of the trainable variables.
-
-    Returns:
-      A list of variables that the L2 loss should be computed for.
-    """
-    mlperf.logger.log(key=mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2,
-                      value=True)
-    return [v for v in variables if 'batchnorm' not in v.name]
-
-  def get_learning_rate(self, global_step, batch_size):
-    del global_step
-    del batch_size
-    return self.learning_rate
-
-  def get_input_shapes(self, subset):
-    """Returns the list of expected shapes of all the inputs to this model."""
-    del subset
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def get_input_data_types(self, subset):
-    """Returns the list of data types of all the inputs to this model."""
-    del subset
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    """Returns the ops to generate synthetic inputs."""
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def build_network(self, inputs, phase_train, nclass):
-    """Builds the forward pass of the model.
-
-    Args:
-      inputs: The list of inputs, including labels
-      phase_train: True during training. False during evaluation.
-      nclass: Number of classes that the inputs can belong to.
-
-    Returns:
-      A BuildNetworkResult which contains the logits and model-specific extra
-        information.
-    """
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def loss_function(self, inputs, build_network_result):
-    """Returns the op to measure the loss of the model.
-
-    Args:
-      inputs: the input list of the model.
-      build_network_result: a BuildNetworkResult returned by build_network().
-
-    Returns:
-      The loss tensor of the model.
-    """
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  # TODO(laigd): have accuracy_function() take build_network_result instead.
-  def accuracy_function(self, inputs, logits):
-    """Returns the ops to measure the accuracy of the model."""
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def postprocess(self, results):
-    """Postprocess results returned from model in Python."""
-    return results
-
-  def reached_target(self):
-    """Define custom methods to stop training when model's target is reached."""
-    return False
-
-
-class CNNModel(Model):
-  """Base model configuration for CNN benchmarks."""
-
-  # TODO(laigd): reduce the number of parameters and read everything from
-  # params.
-  def __init__(self,
-               model,
-               image_size,
-               batch_size,
-               learning_rate,
-               layer_counts=None,
-               fp16_loss_scale=128,
-               params=None):
-    super(CNNModel, self).__init__(
-        model, batch_size, learning_rate, fp16_loss_scale,
-        params=params)
-    self.image_size = image_size
-    self.layer_counts = layer_counts
-    self.depth = 3
-    self.params = params
-    self.data_format = params.data_format if params else 'NCHW'
-
-  def get_layer_counts(self):
-    return self.layer_counts
-
-  def skip_final_affine_layer(self):
-    """Returns if the caller of this class should skip the final affine layer.
-
-    Normally, this class adds a final affine layer to the model after calling
-    self.add_inference(), to generate the logits. If a subclass override this
-    method to return True, the caller should not add the final affine layer.
-
-    This is useful for tests.
-    """
-    return False
-
-  def add_backbone_saver(self):
-    """Creates a tf.train.Saver as self.backbone_saver for loading backbone.
-
-    A tf.train.Saver must be created and saved in self.backbone_saver before
-    calling load_backbone_model, with correct variable name mapping to load
-    variables from checkpoint correctly into the current model.
-    """
-    raise NotImplementedError(self.getName() + ' does not have backbone model.')
-
-  def load_backbone_model(self, sess, backbone_model_path):
-    """Loads variable values from a pre-trained backbone model.
-
-    This should be used at the beginning of the training process for transfer
-    learning models using checkpoints of base models.
-
-    Args:
-      sess: session to train the model.
-      backbone_model_path: path to backbone model checkpoint file.
-    """
-    del sess, backbone_model_path
-    raise NotImplementedError(self.getName() + ' does not have backbone model.')
-
-  def add_inference(self, cnn):
-    """Adds the core layers of the CNN's forward pass.
-
-    This should build the forward pass layers, except for the initial transpose
-    of the images and the final Dense layer producing the logits. The layers
-    should be build with the ConvNetBuilder `cnn`, so that when this function
-    returns, `cnn.top_layer` and `cnn.top_size` refer to the last layer and the
-    number of units of the layer layer, respectively.
-
-    Args:
-      cnn: A ConvNetBuilder to build the forward pass layers with.
-    """
-    del cnn
-    raise NotImplementedError('Must be implemented in derived classes')
-
-  def get_input_data_types(self, subset):
-    """Return data types of inputs for the specified subset."""
-    del subset  # Same types for both 'train' and 'validation' subsets.
-    return [self.data_type, tf.int32]
-
-  def get_input_shapes(self, subset):
-    """Return data shapes of inputs for the specified subset."""
-    del subset  # Same shapes for both 'train' and 'validation' subsets.
-    # Each input is of shape [batch_size, height, width, depth]
-    # Each label is of shape [batch_size]
-    return [[self.batch_size, self.image_size, self.image_size, self.depth],
-            [self.batch_size]]
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    # Synthetic input should be within [0, 255].
-    image_shape, label_shape = self.get_input_shapes('train')
-    inputs = tf.truncated_normal(
-        image_shape,
-        dtype=self.data_type,
-        mean=127,
-        stddev=60,
-        name=self.model_name + '_synthetic_inputs')
-    inputs = variables_module.VariableV1(
-        inputs, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES],
-        name=input_name)
-    labels = tf.random_uniform(
-        label_shape,
-        minval=0,
-        maxval=nclass - 1,
-        dtype=tf.int32,
-        name=self.model_name + '_synthetic_labels')
-    return (inputs, labels)
-
-  def gpu_preprocess_nhwc(self, images, phase_train=True):
-    del phase_train
-    return images
-
-  def build_network(self,
-                    inputs,
-                    phase_train=True,
-                    nclass=1001):
-    """Returns logits from input images.
-
-    Args:
-      inputs: The input images and labels
-      phase_train: True during training. False during evaluation.
-      nclass: Number of classes that the images can belong to.
-
-    Returns:
-      A BuildNetworkResult which contains the logits and model-specific extra
-        information.
-    """
-    images = inputs[0]
-    images = self.gpu_preprocess_nhwc(images, phase_train)
-    if self.data_format == 'NCHW':
-      images = tf.transpose(images, [0, 3, 1, 2])
-    var_type = tf.float32
-    if self.data_type == tf.float16 and self.fp16_vars:
-      var_type = tf.float16
-    network = convnet_builder.ConvNetBuilder(
-        images, self.depth, phase_train, self.use_tf_layers, self.data_format,
-        self.data_type, var_type)
-    with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
-      self.add_inference(network)
-      # Add the final fully-connected class layer
-      logits = (
-          network.affine(nclass, activation='linear')
-          if not self.skip_final_affine_layer() else network.top_layer)
-      mlperf.logger.log(key=mlperf.tags.MODEL_HP_FINAL_SHAPE,
-                        value=logits.shape.as_list()[1:])
-      aux_logits = None
-      if network.aux_top_layer is not None:
-        with network.switch_to_aux_top_layer():
-          aux_logits = network.affine(nclass, activation='linear', stddev=0.001)
-    if self.data_type == tf.float16:
-      # TODO(reedwm): Determine if we should do this cast here.
-      logits = tf.cast(logits, tf.float32)
-      if aux_logits is not None:
-        aux_logits = tf.cast(aux_logits, tf.float32)
-    return BuildNetworkResult(
-        logits=logits, extra_info=None if aux_logits is None else aux_logits)
-
-  def loss_function(self, inputs, build_network_result):
-    """Returns the op to measure the loss of the model."""
-    logits = build_network_result.logits
-    _, labels = inputs
-    # TODO(laigd): consider putting the aux logit in the Inception model,
-    # which could call super.loss_function twice, once with the normal logits
-    # and once with the aux logits.
-    aux_logits = build_network_result.extra_info
-    with tf.name_scope('xentropy'):
-      mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
-      cross_entropy = tf.losses.sparse_softmax_cross_entropy(
-          logits=logits, labels=labels)
-      loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
-    if aux_logits is not None:
-      with tf.name_scope('aux_xentropy'):
-        aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
-            logits=aux_logits, labels=labels)
-        aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
-        loss = tf.add_n([loss, aux_loss])
-    return loss
-
-  def accuracy_function(self, inputs, logits):
-    """Returns the ops to measure the accuracy of the model."""
-    _, labels = inputs
-    top_1_op = tf.reduce_sum(
-        tf.cast(tf.nn.in_top_k(logits, labels, 1), self.data_type))
-    top_5_op = tf.reduce_sum(
-        tf.cast(tf.nn.in_top_k(logits, labels, 5), self.data_type))
-    return {'top_1_accuracy': top_1_op, 'top_5_accuracy': top_5_op}
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/model_config.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/model_config.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Model configurations for CNN benchmarks.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from functools import partial
-
-from models import alexnet_model
-from models import densenet_model
-from models import googlenet_model
-from models import inception_model
-from models import lenet_model
-from models import official_resnet_model
-from models import overfeat_model
-from models import resnet_model
-from models import trivial_model
-from models import vgg_model
-from models.experimental import deepspeech
-from models.experimental import official_ncf_model
-
-
-_model_name_to_imagenet_model = {
-    'vgg11': vgg_model.Vgg11Model,
-    'vgg16': vgg_model.Vgg16Model,
-    'vgg19': vgg_model.Vgg19Model,
-    'lenet': lenet_model.Lenet5Model,
-    'googlenet': googlenet_model.GooglenetModel,
-    'overfeat': overfeat_model.OverfeatModel,
-    'alexnet': alexnet_model.AlexnetModel,
-    'trivial': trivial_model.TrivialModel,
-    'inception3': inception_model.Inceptionv3Model,
-    'inception4': inception_model.Inceptionv4Model,
-    'official_resnet18_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 18),
-    'official_resnet34_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 34),
-    'official_resnet50_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 50),
-    'official_resnet101_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 101),
-    'official_resnet152_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 152),
-    'official_resnet200_v2':
-        partial(official_resnet_model.ImagenetResnetModel, 200),
-    'official_resnet18':
-        partial(official_resnet_model.ImagenetResnetModel, 18, version=1),
-    'official_resnet34':
-        partial(official_resnet_model.ImagenetResnetModel, 34, version=1),
-    'official_resnet50':
-        partial(official_resnet_model.ImagenetResnetModel, 50, version=1),
-    'official_resnet101':
-        partial(official_resnet_model.ImagenetResnetModel, 101, version=1),
-    'official_resnet152':
-        partial(official_resnet_model.ImagenetResnetModel, 152, version=1),
-    'official_resnet200':
-        partial(official_resnet_model.ImagenetResnetModel, 200, version=1),
-    'resnet50': resnet_model.create_resnet50_model,
-    'resnet50_v1.5': resnet_model.create_resnet50_v1_5_model,
-    'resnet50_v2': resnet_model.create_resnet50_v2_model,
-    'resnet101': resnet_model.create_resnet101_model,
-    'resnet101_v2': resnet_model.create_resnet101_v2_model,
-    'resnet152': resnet_model.create_resnet152_model,
-    'resnet152_v2': resnet_model.create_resnet152_v2_model,
-    'ncf': official_ncf_model.NcfModel,
-}
-
-
-_model_name_to_cifar_model = {
-    'alexnet': alexnet_model.AlexnetCifar10Model,
-    'resnet20': resnet_model.create_resnet20_cifar_model,
-    'resnet20_v2': resnet_model.create_resnet20_v2_cifar_model,
-    'resnet32': resnet_model.create_resnet32_cifar_model,
-    'resnet32_v2': resnet_model.create_resnet32_v2_cifar_model,
-    'resnet44': resnet_model.create_resnet44_cifar_model,
-    'resnet44_v2': resnet_model.create_resnet44_v2_cifar_model,
-    'resnet56': resnet_model.create_resnet56_cifar_model,
-    'resnet56_v2': resnet_model.create_resnet56_v2_cifar_model,
-    'resnet110': resnet_model.create_resnet110_cifar_model,
-    'resnet110_v2': resnet_model.create_resnet110_v2_cifar_model,
-    'trivial': trivial_model.TrivialCifar10Model,
-    'densenet40_k12': densenet_model.create_densenet40_k12_model,
-    'densenet100_k12': densenet_model.create_densenet100_k12_model,
-    'densenet100_k24': densenet_model.create_densenet100_k24_model,
-}
-
-
-_model_name_to_object_detection_model = {
-    'trivial': trivial_model.TrivialSSD300Model,
-}
-
-
-def _get_model_map(dataset_name):
-  """Get name to model map for specified dataset."""
-  if dataset_name == 'cifar10':
-    return _model_name_to_cifar_model
-  elif dataset_name in ('imagenet', 'synthetic'):
-    return _model_name_to_imagenet_model
-  elif dataset_name == 'librispeech':
-    return {'deepspeech2': deepspeech.DeepSpeech2Model}
-  elif dataset_name == 'coco':
-    return _model_name_to_object_detection_model
-  else:
-    raise ValueError('Invalid dataset name: %s' % dataset_name)
-
-
-# A model map dict can have this string as a value when TF2 is used, to indicate
-# the model is only available in TF1.
-_TF1_ONLY_STRING = 'TF1_ONLY'
-
-
-def get_model_config(model_name, dataset, params):
-  """Map model name to model network configuration."""
-  model_map = _get_model_map(dataset.name)
-  if model_name not in model_map:
-    raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
-                     (model_name, dataset.name))
-  model = model_map[model_name](params=params)
-  if model == 'TF1_ONLY':
-    raise ValueError('Model \'%s\' can only be used with TensorFlow 1'
-                     % (model_name,))
-  return model
-
-
-def register_model(model_name, dataset_name, model_func):
-  """Register a new model that can be obtained with `get_model_config`."""
-  model_map = _get_model_map(dataset_name)
-  if model_name in model_map:
-    raise ValueError('Model "%s" is already registered for dataset "%s"' %
-                     (model_name, dataset_name))
-  model_map[model_name] = model_func
-
-
-# pylint: disable=g-import-not-at-top
-try:
-  from tensorflow.contrib import slim  # pylint: disable=unused-import
-  can_import_contrib = True
-except ImportError:
-  can_import_contrib = False
-
-
-def register_tf1_models():
-  """Registers all the TensorFlow 1-only models.
-
-  TF 1-only models use contrib, which was removed in TF 2. If contrib can be
-  imported, the TF 1-only models are registered normally. If contrib cannot be
-  imported, the models are registered with the 'TF1_ONLY' string instead, which
-  will cause an error to be thrown if these models are used.
-  """
-  if can_import_contrib:
-    from models.tf1_only import mobilenet_v2
-    from models.tf1_only import nasnet_model
-    from models.tf1_only import ssd_model
-    register_model('mobilenet', 'imagenet', mobilenet_v2.MobilenetModel)
-    register_model('nasnet', 'imagenet', nasnet_model.NasnetModel)
-    register_model('nasnetlarge', 'imagenet', nasnet_model.NasnetLargeModel)
-    register_model('nasnet', 'cifar10', nasnet_model.NasnetCifarModel)
-    register_model('ssd300', 'coco', ssd_model.SSD300Model)
-  else:
-    register_model('mobilenet', 'imagenet', 'TF1_ONLY')
-    register_model('nasnet', 'imagenet', 'TF1_ONLY')
-    register_model('nasnetlarge', 'imagenet', 'TF1_ONLY')
-    register_model('nasnet', 'cifar10', 'TF1_ONLY')
-    register_model('ssd300', 'coco', 'TF1_ONLY')
-
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/official_resnet_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/official_resnet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Import official resnet models."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-import datasets
-from models import model as model_lib
-
-
-class ImagenetResnetModel(model_lib.CNNModel):
-  """Official resnet models."""
-
-  def __init__(self, resnet_size, version=2, params=None):
-    """These are the parameters that work for Imagenet data.
-
-    Args:
-      resnet_size: The number of convolutional layers needed in the model.
-      version: 1 or 2 for v1 or v2, respectively.
-      params: params passed by BenchmarkCNN.
-    """
-    default_batch_sizes = {
-        50: 128,
-        101: 32,
-        152: 32
-    }
-    batch_size = default_batch_sizes.get(resnet_size, 32)
-    default_learning_rate = 0.0125 * batch_size / 32
-    model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
-    super(ImagenetResnetModel, self).__init__(
-        model_name, 224, batch_size, default_learning_rate, params=params)
-    self.resnet_size = resnet_size
-    self.version = version
-
-  def get_learning_rate(self, global_step, batch_size):
-    num_batches_per_epoch = (
-        float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
-    boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
-    values = [1, 0.1, 0.01, 0.001, 0.0001]
-    adjusted_learning_rate = (
-        self.learning_rate / self.default_batch_size * batch_size)
-    values = [v * adjusted_learning_rate for v in values]
-    return tf.train.piecewise_constant(global_step, boundaries, values)
-
-  def build_network(self, images, phase_train=True, nclass=1001,
-                    data_type=tf.float32):
-    # pylint: disable=g-import-not-at-top
-    try:
-      from official.resnet.r1.imagenet_main import ImagenetModel
-    except ImportError:
-      tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
-      raise
-    images = tf.cast(images, data_type)
-    model_class = ImagenetModel(resnet_size=self.resnet_size,
-                                resnet_version=self.version,
-                                # The official model dtype seems to be ignored,
-                                # as the dtype it uses is the dtype of the input
-                                # images. Doesn't hurt to set it though.
-                                dtype=data_type)
-    logits = model_class(images, phase_train)
-    logits = tf.cast(logits, tf.float32)
-    return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/overfeat_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/overfeat_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Overfeat model configuration.
-
-References:
-  OverFeat: Integrated Recognition, Localization and Detection using
-  Convolutional Networks
-  Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus,
-  Yann LeCun, 2014
-  http://arxiv.org/abs/1312.6229
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from models import model
-
-
-class OverfeatModel(model.CNNModel):
-  """OverfeatModel."""
-
-  def __init__(self, params=None):
-    super(OverfeatModel, self).__init__(
-        'overfeat', 231, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    # Note: VALID requires padding the images by 3 in width and height
-    cnn.conv(96, 11, 11, 4, 4, mode='VALID')
-    cnn.mpool(2, 2)
-    cnn.conv(256, 5, 5, 1, 1, mode='VALID')
-    cnn.mpool(2, 2)
-    cnn.conv(512, 3, 3)
-    cnn.conv(1024, 3, 3)
-    cnn.conv(1024, 3, 3)
-    cnn.mpool(2, 2)
-    cnn.reshape([-1, 1024 * 6 * 6])
-    cnn.affine(3072)
-    cnn.dropout()
-    cnn.affine(4096)
-    cnn.dropout()
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/resnet_model.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/resnet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Resnet model configuration.
-
-References:
-  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-  Deep Residual Learning for Image Recognition
-  arXiv:1512.03385 (2015)
-
-  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-  Identity Mappings in Deep Residual Networks
-  arXiv:1603.05027 (2016)
-
-  Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy,
-  Alan L. Yuille
-  DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
-  Atrous Convolution, and Fully Connected CRFs
-  arXiv:1606.00915 (2016)
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-import datasets
-import mlperf
-from models import model as model_lib
-
-
-def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
-  """Bottleneck block with identity short-cut for ResNet v1.
-
-  Args:
-    cnn: the network to append bottleneck blocks.
-    depth: the number of output filters for this bottleneck block.
-    depth_bottleneck: the number of bottleneck filters for this block.
-    stride: Stride used in the first layer of the bottleneck block.
-  """
-  input_layer = cnn.top_layer
-  in_size = cnn.top_size
-  name_key = 'resnet_v1'
-  name = name_key + str(cnn.counts[name_key])
-  cnn.counts[name_key] += 1
-
-  with tf.variable_scope(name):
-    if depth == in_size:
-      if stride == 1:
-        shortcut = input_layer
-      else:
-        shortcut = cnn.apool(
-            1, 1, stride, stride, input_layer=input_layer,
-            num_channels_in=in_size)
-        mlperf.logger.log_projection(input_tensor=input_layer,
-                                     output_tensor=shortcut)
-    else:
-      shortcut = cnn.conv(
-          depth, 1, 1, stride, stride, activation=None,
-          use_batch_norm=True, input_layer=input_layer,
-          num_channels_in=in_size, bias=None)
-    cnn.conv(depth_bottleneck, 1, 1, stride, stride,
-             input_layer=input_layer, num_channels_in=in_size,
-             use_batch_norm=True, bias=None)
-    cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
-             use_batch_norm=True, bias=None)
-    res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
-                   use_batch_norm=True, bias=None)
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
-    output = tf.nn.relu(shortcut + res)
-    cnn.top_layer = output
-    cnn.top_size = depth
-
-
-def bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride):
-  """Bottleneck block with identity short-cut for ResNet v1.5.
-
-  ResNet v1.5 is the informal name for ResNet v1 where stride 2 is used in the
-  first 3x3 convolution of each block instead of the first 1x1 convolution.
-
-  First seen at https://github.com/facebook/fb.resnet.torch. Used in the paper
-  "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"
-  (arXiv:1706.02677v2) and by fast.ai to train to accuracy in 45 epochs using
-  multiple image sizes.
-
-  Args:
-    cnn: the network to append bottleneck blocks.
-    depth: the number of output filters for this bottleneck block.
-    depth_bottleneck: the number of bottleneck filters for this block.
-    stride: Stride used in the first layer of the bottleneck block.
-  """
-  input_layer = cnn.top_layer
-  in_size = cnn.top_size
-  name_key = 'resnet_v1.5'
-  name = name_key + str(cnn.counts[name_key])
-  cnn.counts[name_key] += 1
-
-  with tf.variable_scope(name):
-    if depth == in_size:
-      if stride == 1:
-        shortcut = input_layer
-      else:
-        shortcut = cnn.apool(
-            1, 1, stride, stride, input_layer=input_layer,
-            num_channels_in=in_size)
-        mlperf.logger.log_projection(input_tensor=input_layer,
-                                     output_tensor=shortcut)
-    else:
-      shortcut = cnn.conv(
-          depth, 1, 1, stride, stride, activation=None,
-          use_batch_norm=True, input_layer=input_layer,
-          num_channels_in=in_size, bias=None)
-      mlperf.logger.log_projection(input_tensor=input_layer,
-                                   output_tensor=shortcut)
-    cnn.conv(depth_bottleneck, 1, 1, 1, 1,
-             input_layer=input_layer, num_channels_in=in_size,
-             use_batch_norm=True, bias=None)
-    cnn.conv(depth_bottleneck, 3, 3, stride, stride, mode='SAME_RESNET',
-             use_batch_norm=True, bias=None)
-    res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
-                   use_batch_norm=True, bias=None)
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
-    output = tf.nn.relu(shortcut + res)
-    cnn.top_layer = output
-    cnn.top_size = depth
-
-
-def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
-  """Bottleneck block with identity short-cut for ResNet v2.
-
-  The main difference from v1 is that a batch norm and relu are done at the
-  start of the block, instead of the end. This initial batch norm and relu is
-  collectively called a pre-activation.
-
-  Args:
-    cnn: the network to append bottleneck blocks.
-    depth: the number of output filters for this bottleneck block.
-    depth_bottleneck: the number of bottleneck filters for this block.
-    stride: Stride used in the first layer of the bottleneck block.
-  """
-  input_layer = cnn.top_layer
-  in_size = cnn.top_size
-  name_key = 'resnet_v2'
-  name = name_key + str(cnn.counts[name_key])
-  cnn.counts[name_key] += 1
-
-  preact = cnn.batch_norm()
-  mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
-  preact = tf.nn.relu(preact)
-  with tf.variable_scope(name):
-    if depth == in_size:
-      if stride == 1:
-        shortcut = input_layer
-      else:
-        shortcut = cnn.apool(
-            1, 1, stride, stride, input_layer=input_layer,
-            num_channels_in=in_size)
-        mlperf.logger.log_projection(input_tensor=input_layer,
-                                     output_tensor=shortcut)
-    else:
-      shortcut = cnn.conv(
-          depth, 1, 1, stride, stride, activation=None, use_batch_norm=False,
-          input_layer=preact, num_channels_in=in_size, bias=None)
-    cnn.conv(depth_bottleneck, 1, 1, stride, stride,
-             input_layer=preact, num_channels_in=in_size,
-             use_batch_norm=True, bias=None)
-    cnn.conv(depth_bottleneck, 3, 3, 1, 1, mode='SAME_RESNET',
-             use_batch_norm=True, bias=None)
-    res = cnn.conv(depth, 1, 1, 1, 1, activation=None,
-                   use_batch_norm=False, bias=None)
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_SHORTCUT_ADD)
-    output = shortcut + res
-    cnn.top_layer = output
-    cnn.top_size = depth
-
-
-def bottleneck_block(cnn, depth, depth_bottleneck, stride, version):
-  """Bottleneck block with identity short-cut.
-
-  Args:
-    cnn: the network to append bottleneck blocks.
-    depth: the number of output filters for this bottleneck block.
-    depth_bottleneck: the number of bottleneck filters for this block.
-    stride: Stride used in the first layer of the bottleneck block.
-    version: version of ResNet to build.
-  """
-  mlperf.logger.log(key=mlperf.tags.MODEL_HP_BLOCK_TYPE,
-                    value=mlperf.tags.BOTTLENECK_BLOCK)
-  mlperf.logger.log_begin_block(
-      input_tensor=cnn.top_layer, block_type=mlperf.tags.BOTTLENECK_BLOCK)
-  if version == 'v2':
-    bottleneck_block_v2(cnn, depth, depth_bottleneck, stride)
-  elif version == 'v1.5':
-    bottleneck_block_v1_5(cnn, depth, depth_bottleneck, stride)
-  else:
-    bottleneck_block_v1(cnn, depth, depth_bottleneck, stride)
-  mlperf.logger.log_end_block(output_tensor=cnn.top_layer)
-
-
-def residual_block(cnn, depth, stride, version, projection_shortcut=False):
-  """Residual block with identity short-cut.
-
-  Args:
-    cnn: the network to append residual blocks.
-    depth: the number of output filters for this residual block.
-    stride: Stride used in the first layer of the residual block.
-    version: version of ResNet to build.
-    projection_shortcut: indicator of using projection shortcut, even if top
-      size and depth are equal
-  """
-  pre_activation = True if version == 'v2' else False
-  input_layer = cnn.top_layer
-  in_size = cnn.top_size
-
-  if projection_shortcut:
-    shortcut = cnn.conv(
-        depth, 1, 1, stride, stride, activation=None,
-        use_batch_norm=True, input_layer=input_layer,
-        num_channels_in=in_size, bias=None)
-  elif in_size != depth:
-    # Plan A of shortcut.
-    shortcut = cnn.apool(1, 1, stride, stride,
-                         input_layer=input_layer,
-                         num_channels_in=in_size)
-    padding = (depth - in_size) // 2
-    if cnn.channel_pos == 'channels_last':
-      shortcut = tf.pad(
-          shortcut, [[0, 0], [0, 0], [0, 0], [padding, padding]])
-    else:
-      shortcut = tf.pad(
-          shortcut, [[0, 0], [padding, padding], [0, 0], [0, 0]])
-  else:
-    shortcut = input_layer
-  if pre_activation:
-    res = cnn.batch_norm(input_layer)
-    res = tf.nn.relu(res)
-  else:
-    res = input_layer
-  cnn.conv(depth, 3, 3, stride, stride,
-           input_layer=res, num_channels_in=in_size,
-           use_batch_norm=True, bias=None)
-  if pre_activation:
-    res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
-                   use_batch_norm=False, bias=None)
-    output = shortcut + res
-  else:
-    res = cnn.conv(depth, 3, 3, 1, 1, activation=None,
-                   use_batch_norm=True, bias=None)
-    output = tf.nn.relu(shortcut + res)
-  cnn.top_layer = output
-  cnn.top_size = depth
-
-
-class ResnetModel(model_lib.CNNModel):
-  """Resnet cnn network configuration."""
-
-  def __init__(self, model, layer_counts, params=None):
-    default_batch_sizes = {
-        'resnet50': 64,
-        'resnet101': 32,
-        'resnet152': 32,
-        'resnet50_v1.5': 64,
-        'resnet101_v1.5': 32,
-        'resnet152_v1.5': 32,
-        'resnet50_v2': 64,
-        'resnet101_v2': 32,
-        'resnet152_v2': 32,
-    }
-    batch_size = default_batch_sizes.get(model, 32)
-    # The ResNet paper uses a starting lr of .1 at bs=256.
-    self.base_lr_batch_size = 256
-    base_lr = 0.128
-    if params and params.resnet_base_lr:
-      base_lr = params.resnet_base_lr
-
-    super(ResnetModel, self).__init__(model, 224, batch_size, base_lr,
-                                      layer_counts, params=params)
-    if 'v2' in model:
-      self.version = 'v2'
-    elif 'v1.5' in model:
-      self.version = 'v1.5'
-    else:
-      self.version = 'v1'
-
-  def add_inference(self, cnn):
-    if self.layer_counts is None:
-      raise ValueError('Layer counts not specified for %s' % self.get_model())
-    # Drop batch size from shape logging.
-    mlperf.logger.log(key=mlperf.tags.MODEL_HP_INITIAL_SHAPE,
-                      value=cnn.top_layer.shape.as_list()[1:])
-    cnn.use_batch_norm = True
-    cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
-    cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    for _ in xrange(self.layer_counts[0]):
-      bottleneck_block(cnn, 256, 64, 1, self.version)
-    for i in xrange(self.layer_counts[1]):
-      stride = 2 if i == 0 else 1
-      bottleneck_block(cnn, 512, 128, stride, self.version)
-    for i in xrange(self.layer_counts[2]):
-      stride = 2 if i == 0 else 1
-      bottleneck_block(cnn, 1024, 256, stride, self.version)
-    for i in xrange(self.layer_counts[3]):
-      stride = 2 if i == 0 else 1
-      bottleneck_block(cnn, 2048, 512, stride, self.version)
-    if self.version == 'v2':
-      cnn.batch_norm()
-      cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    cnn.spatial_mean()
-
-  def get_learning_rate(self, global_step, batch_size):
-    rescaled_lr = self.get_scaled_base_learning_rate(batch_size)
-    num_batches_per_epoch = (
-        datasets.IMAGENET_NUM_TRAIN_IMAGES / batch_size)
-    boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
-    values = [1, 0.1, 0.01, 0.001, 0.0001]
-    values = [rescaled_lr * v for v in values]
-    lr = tf.train.piecewise_constant(global_step, boundaries, values)
-    warmup_steps = int(num_batches_per_epoch * 5)
-    mlperf.logger.log(key=mlperf.tags.OPT_LR_WARMUP_STEPS, value=warmup_steps)
-    warmup_lr = (
-        rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
-            warmup_steps, tf.float32))
-    return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
-
-  def get_scaled_base_learning_rate(self, batch_size):
-    """Calculates base learning rate for creating lr schedule.
-
-    In replicated mode, gradients are summed rather than averaged which, with
-    the sgd and momentum optimizers, increases the effective learning rate by
-    lr * num_gpus. Dividing the base lr by num_gpus negates the increase.
-
-    Args:
-      batch_size: Total batch-size.
-
-    Returns:
-      Base learning rate to use to create lr schedule.
-    """
-    base_lr = self.learning_rate
-    if self.params.variable_update == 'replicated':
-      base_lr = self.learning_rate / self.params.num_gpus
-    scaled_lr = base_lr * (batch_size / self.base_lr_batch_size)
-    return scaled_lr
-
-
-def create_resnet50_model(params):
-  return ResnetModel('resnet50', (3, 4, 6, 3), params=params)
-
-
-def create_resnet50_v1_5_model(params):
-  return ResnetModel('resnet50_v1.5', (3, 4, 6, 3), params=params)
-
-
-def create_resnet50_v2_model(params):
-  return ResnetModel('resnet50_v2', (3, 4, 6, 3), params=params)
-
-
-def create_resnet101_model(params):
-  return ResnetModel('resnet101', (3, 4, 23, 3), params=params)
-
-
-def create_resnet101_v2_model(params):
-  return ResnetModel('resnet101_v2', (3, 4, 23, 3), params=params)
-
-
-def create_resnet152_model(params):
-  return ResnetModel('resnet152', (3, 8, 36, 3), params=params)
-
-
-def create_resnet152_v2_model(params):
-  return ResnetModel('resnet152_v2', (3, 8, 36, 3), params=params)
-
-
-class ResnetCifar10Model(model_lib.CNNModel):
-  """Resnet cnn network configuration for Cifar 10 dataset.
-
-  V1 model architecture follows the one defined in the paper:
-  https://arxiv.org/pdf/1512.03385.pdf.
-
-  V2 model architecture follows the one defined in the paper:
-  https://arxiv.org/pdf/1603.05027.pdf.
-  """
-
-  def __init__(self, model, layer_counts, params=None):
-    if 'v2' in model:
-      self.version = 'v2'
-    else:
-      self.version = 'v1'
-    super(ResnetCifar10Model, self).__init__(
-        model, 32, 128, 0.1, layer_counts, params=params)
-
-  def add_inference(self, cnn):
-    if self.layer_counts is None:
-      raise ValueError('Layer counts not specified for %s' % self.get_model())
-
-    cnn.use_batch_norm = True
-    cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
-    if self.version == 'v2':
-      cnn.conv(16, 3, 3, 1, 1, use_batch_norm=True)
-    else:
-      cnn.conv(16, 3, 3, 1, 1, activation=None, use_batch_norm=True)
-    for i in xrange(self.layer_counts[0]):
-      # reshape to batch_size x 16 x 32 x 32
-      residual_block(cnn, 16, 1, self.version)
-    for i in xrange(self.layer_counts[1]):
-      # Subsampling is performed at the first convolution with a stride of 2
-      stride = 2 if i == 0 else 1
-      # reshape to batch_size x 32 x 16 x 16
-      residual_block(cnn, 32, stride, self.version)
-    for i in xrange(self.layer_counts[2]):
-      stride = 2 if i == 0 else 1
-      # reshape to batch_size x 64 x 8 x 8
-      residual_block(cnn, 64, stride, self.version)
-    if self.version == 'v2':
-      cnn.batch_norm()
-      cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    cnn.spatial_mean()
-
-  def get_learning_rate(self, global_step, batch_size):
-    num_batches_per_epoch = int(50000 / batch_size)
-    boundaries = num_batches_per_epoch * np.array([82, 123, 300],
-                                                  dtype=np.int64)
-    boundaries = [x for x in boundaries]
-    values = [0.1, 0.01, 0.001, 0.0002]
-    return tf.train.piecewise_constant(global_step, boundaries, values)
-
-
-def create_resnet20_cifar_model(params):
-  return ResnetCifar10Model('resnet20', (3, 3, 3), params=params)
-
-
-def create_resnet20_v2_cifar_model(params):
-  return ResnetCifar10Model('resnet20_v2', (3, 3, 3), params=params)
-
-
-def create_resnet32_cifar_model(params):
-  return ResnetCifar10Model('resnet32', (5, 5, 5), params=params)
-
-
-def create_resnet32_v2_cifar_model(params):
-  return ResnetCifar10Model('resnet32_v2', (5, 5, 5), params=params)
-
-
-def create_resnet44_cifar_model(params):
-  return ResnetCifar10Model('resnet44', (7, 7, 7), params=params)
-
-
-def create_resnet44_v2_cifar_model(params):
-  return ResnetCifar10Model('resnet44_v2', (7, 7, 7), params=params)
-
-
-def create_resnet56_cifar_model(params):
-  return ResnetCifar10Model('resnet56', (9, 9, 9), params=params)
-
-
-def create_resnet56_v2_cifar_model(params):
-  return ResnetCifar10Model('resnet56_v2', (9, 9, 9), params=params)
-
-
-def create_resnet110_cifar_model(params):
-  return ResnetCifar10Model('resnet110', (18, 18, 18), params=params)
-
-
-def create_resnet110_v2_cifar_model(params):
-  return ResnetCifar10Model('resnet110_v2', (18, 18, 18), params=params)
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/resnet_model_test.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/resnet_model_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for resnet_model."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import mock
-import tensorflow.compat.v1 as tf
-
-from models import resnet_model
-
-
-class ResNetModelTest(tf.test.TestCase):
-
-  def testGetScaledBaseLearningRateOneGpuLrFromParams(self):
-    """Verifies setting params.resnet_base_lr pipes through."""
-    lr = self._get_scaled_base_learning_rate(1,
-                                             'parameter_server',
-                                             256,
-                                             base_lr=.050)
-    self.assertEqual(lr, .050)
-
-  def testGetScaledBaseLearningRateOneGpu(self):
-    lr = self._get_scaled_base_learning_rate(1, 'parameter_server', 128)
-    self.assertEqual(lr, .064)
-
-  def testGetScaledBaseLearningRateEightGpuReplicated(self):
-    lr = self._get_scaled_base_learning_rate(8, 'replicated', 256 * 8)
-    self.assertEqual(lr, .128)
-
-  def testGetScaledBaseLearningRateTwoGpuParameter(self):
-    lr = self._get_scaled_base_learning_rate(2, 'parameter_server', 256 * 2)
-    self.assertEqual(lr, .256)
-
-  def testGetScaledBaseLearningRateTwoGpuUneven(self):
-    lr = self._get_scaled_base_learning_rate(2, 'replicated', 13)
-    self.assertEqual(lr, 0.0032500000000000003)
-
-  def _get_scaled_base_learning_rate(self,
-                                     num_gpus,
-                                     variable_update,
-                                     batch_size,
-                                     base_lr=None):
-    """Simplifies testing different learning rate calculations.
-
-    Args:
-      num_gpus: Number of GPUs to be used.
-      variable_update: Type of variable update used.
-      batch_size: Total batch size.
-      base_lr: Base learning rate before scaling.
-
-    Returns:
-      Base learning rate that would be used to create lr schedule.
-    """
-    params = mock.Mock()
-    params.num_gpus = num_gpus
-    params.variable_update = variable_update
-    if base_lr:
-      params.resnet_base_lr = base_lr
-    resnet50_model = resnet_model.ResnetModel('resnet50', 50, params=params)
-    return resnet50_model.get_scaled_base_learning_rate(batch_size)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__init__.py
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__init__.py
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/__init__.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/__init__.cpython-36.pyc
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/mobilenet.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/mobilenet.cpython-36.pyc
--- a/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/mobilenet_conv_blocks.cpython-36.pyc
+++ b/TensorFlow/ComputeVision/Classification/benchmark/scripts/tf_cnn_benchmarks/models/tf1_only/__pycache__/mobilenet_conv_blocks.cpython-36.pyc