Commit 6b6f8b0c authored by huchen's avatar huchen
Browse files

del tensorflow benchmark cls

parent 4749cd5e
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark dataset utilities.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from abc import abstractmethod
import os
import numpy as np
import six
from six.moves import cPickle
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
from tensorflow.python.platform import gfile
import preprocessing
IMAGENET_NUM_TRAIN_IMAGES = 1281167
IMAGENET_NUM_VAL_IMAGES = 50000
COCO_NUM_TRAIN_IMAGES = 118287
COCO_NUM_VAL_IMAGES = 4952
class Dataset(object):
"""Abstract class for cnn benchmarks dataset."""
def __init__(self,
name,
data_dir=None,
queue_runner_required=False,
num_classes=None):
self.name = name
self.data_dir = data_dir
self._queue_runner_required = queue_runner_required
self._num_classes = num_classes
def tf_record_pattern(self, subset):
return os.path.join(self.data_dir, '%s-*-of-*' % subset)
def reader(self):
return tf.TFRecordReader()
@property
def num_classes(self):
return self._num_classes
@num_classes.setter
def num_classes(self, val):
self._num_classes = val
@abstractmethod
def num_examples_per_epoch(self, subset):
pass
def __str__(self):
return self.name
def get_input_preprocessor(self, input_preprocessor='default'):
assert not self.use_synthetic_gpu_inputs()
return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor]
def queue_runner_required(self):
return self._queue_runner_required
def use_synthetic_gpu_inputs(self):
return not self.data_dir
class LibrispeechDataset(Dataset):
"""Configuration for LibriSpeech dataset."""
def __init__(self, data_dir=None):
super(LibrispeechDataset, self).__init__(
'librispeech', data_dir, num_classes=29)
def tf_record_pattern(self, subset):
if subset == 'train':
return os.path.join(self.data_dir, 'train-clean-*.tfrecords')
elif subset == 'validation':
return os.path.join(self.data_dir, 'test-clean.tfrecords')
else:
return ''
def num_examples_per_epoch(self, subset='train'):
del subset
return 2 # TODO(laigd): currently this is an arbitrary number.
class ImageDataset(Dataset):
"""Abstract class for image datasets."""
def __init__(self,
name,
height,
width,
depth=None,
data_dir=None,
queue_runner_required=False,
num_classes=1001):
super(ImageDataset, self).__init__(name, data_dir, queue_runner_required,
num_classes)
self.height = height
self.width = width
self.depth = depth or 3
class ImagenetDataset(ImageDataset):
"""Configuration for Imagenet dataset."""
def __init__(self, data_dir=None):
super(ImagenetDataset, self).__init__(
'imagenet', 300, 300, data_dir=data_dir)
def num_examples_per_epoch(self, subset='train'):
if subset == 'train':
return IMAGENET_NUM_TRAIN_IMAGES
elif subset == 'validation':
return IMAGENET_NUM_VAL_IMAGES
else:
raise ValueError('Invalid data subset "%s"' % subset)
class Cifar10Dataset(ImageDataset):
"""Configuration for cifar 10 dataset.
It will mount all the input images to memory.
"""
def __init__(self, data_dir=None):
super(Cifar10Dataset, self).__init__(
'cifar10',
32,
32,
data_dir=data_dir,
queue_runner_required=True,
num_classes=11)
def read_data_files(self, subset='train'):
"""Reads from data file and returns images and labels in a numpy array."""
assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
'data')
if subset == 'train':
filenames = [
os.path.join(self.data_dir, 'data_batch_%d' % i)
for i in xrange(1, 6)
]
elif subset == 'validation':
filenames = [os.path.join(self.data_dir, 'test_batch')]
else:
raise ValueError('Invalid data subset "%s"' % subset)
inputs = []
for filename in filenames:
with gfile.Open(filename, 'rb') as f:
# python2 does not have the encoding parameter
encoding = {} if six.PY2 else {'encoding': 'bytes'}
inputs.append(cPickle.load(f, **encoding))
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
all_images = np.concatenate(
[each_input[b'data'] for each_input in inputs]).astype(np.float32)
all_labels = np.concatenate(
[each_input[b'labels'] for each_input in inputs])
return all_images, all_labels
def num_examples_per_epoch(self, subset='train'):
if subset == 'train':
return 50000
elif subset == 'validation':
return 10000
else:
raise ValueError('Invalid data subset "%s"' % subset)
class COCODataset(ImageDataset):
"""COnfiguration for COCO dataset."""
def __init__(self, data_dir=None, image_size=300):
super(COCODataset, self).__init__(
'coco', image_size, image_size, data_dir=data_dir, num_classes=81)
def num_examples_per_epoch(self, subset='train'):
if subset == 'train':
return COCO_NUM_TRAIN_IMAGES
elif subset == 'validation':
return COCO_NUM_VAL_IMAGES
else:
raise ValueError('Invalid data subset "%s"' % subset)
_SUPPORTED_DATASETS = {
'imagenet': ImagenetDataset,
'cifar10': Cifar10Dataset,
'librispeech': LibrispeechDataset,
'coco': COCODataset,
}
_SUPPORTED_INPUT_PREPROCESSORS = {
'imagenet': {
'default': preprocessing.RecordInputImagePreprocessor,
'official_models_imagenet': preprocessing.ImagenetPreprocessor,
},
'cifar10': {
'default': preprocessing.Cifar10ImagePreprocessor
},
'librispeech': {
'default': preprocessing.LibrispeechPreprocessor
},
'coco': {
'default': preprocessing.COCOPreprocessor
},
}
def create_dataset(data_dir, data_name):
"""Create a Dataset instance based on data_dir and data_name."""
if not data_dir and not data_name:
# When using synthetic data, use synthetic imagenet images by default.
data_name = 'imagenet'
# Infere dataset name from data_dir if data_name is not provided.
if data_name is None:
for supported_name in _SUPPORTED_DATASETS:
if supported_name in data_dir:
data_name = supported_name
break
else: # Failed to identify dataset name from data dir.
raise ValueError('Could not identify name of dataset. '
'Please specify with --data_name option.')
if data_name not in _SUPPORTED_DATASETS:
raise ValueError('Unknown dataset. Must be one of %s' % ', '.join(
[key for key in sorted(_SUPPORTED_DATASETS.keys())]))
return _SUPPORTED_DATASETS[data_name](data_dir)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions to define flags and params.
Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
a command-line flag for every ParamSpec defined by a DEFINE_* functions.
The reason we don't use absl flags directly is that we want to be able to use
tf_cnn_benchmarks as a library. When using it as a library, we don't want to
define any flags, but instead pass parameters to the BenchmarkCNN constructor.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
from absl import flags as absl_flags
import six
FLAGS = absl_flags.FLAGS
# ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
ParamSpec = namedtuple('_ParamSpec',
['flag_type', 'default_value', 'description',
'kwargs'])
# Maps from parameter name to its ParamSpec.
param_specs = {}
def DEFINE_string(name, default, help): # pylint: disable=invalid-name,redefined-builtin
param_specs[name] = ParamSpec('string', default, help, {})
def DEFINE_boolean(name, default, help): # pylint: disable=invalid-name,redefined-builtin
param_specs[name] = ParamSpec('boolean', default, help, {})
def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin
kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
param_specs[name] = ParamSpec('integer', default, help, kwargs)
def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None): # pylint: disable=invalid-name,redefined-builtin
kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
param_specs[name] = ParamSpec('float', default, help, kwargs)
def DEFINE_enum(name, default, enum_values, help): # pylint: disable=invalid-name,redefined-builtin
kwargs = {'enum_values': enum_values}
param_specs[name] = ParamSpec('enum', default, help, kwargs)
def DEFINE_list(name, default, help): # pylint: disable=invalid-name,redefined-builtin
param_specs[name] = ParamSpec('list', default, help, {})
def define_flags(specs=None):
"""Define a command line flag for each ParamSpec in flags.param_specs."""
specs = specs or param_specs
define_flag = {
'boolean': absl_flags.DEFINE_boolean,
'float': absl_flags.DEFINE_float,
'integer': absl_flags.DEFINE_integer,
'string': absl_flags.DEFINE_string,
'enum': absl_flags.DEFINE_enum,
'list': absl_flags.DEFINE_list
}
for name, param_spec in six.iteritems(specs):
if param_spec.flag_type not in define_flag:
raise ValueError('Unknown flag_type %s' % param_spec.flag_type)
else:
define_flag[param_spec.flag_type](name, param_spec.default_value,
help=param_spec.description,
**param_spec.kwargs)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark various leading indicators CNNs.
The purpose of these tests is to test each model as a high level baseline and
to ensure the various variable_update options have not regressing. Not all
options are tested. The tests focus on the most viable options.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ctypes
import logging
import os
import sys
from absl import flags
from absl.testing import absltest # pylint: disable=unused-import
import tensorflow.compat.v1 as tf # pylint: disable=g-bad-import-order
import benchmark_cnn
from platforms import util as platforms_util
flags.DEFINE_integer('num_batches', None,
'number of batches to run, excluding warmup')
class BenchmarkBase(tf.test.Benchmark):
"""Base class for all benchmarks in this file."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Base class for all benchmarks in this file.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
# Load default values if the benchmark is not run with absl.app.run()
if not flags.FLAGS.is_parsed():
flags.FLAGS.mark_as_parsed()
self.fake_data_dir = os.path.join(platforms_util.get_test_data_dir(),
'fake_tf_record_data')
self.output_dir = output_dir
if root_data_dir is None:
self.data_dir = ('/readahead/200M/placer/prod/home/distbelief/'
'imagenet-tensorflow/imagenet-2012-tfrecord')
else:
self.data_dir = os.path.join(root_data_dir, 'imagenet')
def _run_benchmark(self, params):
"""Run a CNN benchmark and report its results.
Args:
params: Params tuple, typically created by benchmark_cnn.make_params or
benchmark_cnn.make_params_from_flags.
"""
logging.info('Running benchmark [%s]', self._get_name())
params = benchmark_cnn.setup(params)
bench = benchmark_cnn.BenchmarkCNN(params)
bench.print_info()
stats = bench.run()
extras = {}
extras['examples_per_sec'] = stats.get('images_per_sec')
if 'last_average_loss' in stats:
extras['last_average_loss'] = stats['last_average_loss']
if 'top_1_accuracy' in stats:
extras['top_1_accuracy'] = stats['top_1_accuracy']
if 'top_5_accuracy' in stats:
extras['top_5_accuracy'] = stats['top_5_accuracy']
self.report_benchmark(
iters=stats.get('num_steps'),
wall_time=stats.get('average_wall_time'),
extras=extras)
def _shared_params(self):
"""Returns shared parameters for all benchmarks in this file."""
params = {}
if flags.FLAGS.num_batches is not None:
params['num_batches'] = flags.FLAGS.num_batches
if self.output_dir is not None:
params['benchmark_log_dir'] = self.output_dir
return benchmark_cnn.make_params(**params)
def _binary_search_batch_size(self, params, init_batch_size):
"""Find the max batch_size using binary search."""
assert init_batch_size > 0
low_batch_size = 0
high_batch_size = None
batch_size = init_batch_size
# No need to run a warmup or many batches; if it doesn't OOM after 10
# batches, it should work in general.
params = params._replace(num_batches=10, num_warmup_batches=0)
# Find high_batch_size first.
tf.logging.info(
'Looking for upper bound to batch size, starting with %d' % batch_size)
while high_batch_size is None:
tf.logging.info('Trying batch_size %d' % batch_size)
params = params._replace(batch_size=batch_size)
bench = benchmark_cnn.BenchmarkCNN(params)
bench.print_info()
try:
bench.run()
low_batch_size = batch_size
batch_size *= 2
except tf.errors.ResourceExhaustedError:
high_batch_size = batch_size - 1
# Binary Search
tf.logging.info(
'Max batch size is in range (%d, %d]. Starting binary search to find '
'exact max batch size.' % (low_batch_size, batch_size))
while low_batch_size < high_batch_size:
batch_size = (low_batch_size + high_batch_size + 1) // 2
tf.logging.info('Trying batch_size %d' % batch_size)
params = params._replace(batch_size=batch_size)
bench = benchmark_cnn.BenchmarkCNN(params)
bench.print_info()
try:
bench.run()
low_batch_size = batch_size
except tf.errors.ResourceExhaustedError:
high_batch_size = batch_size - 1
self.report_benchmark(extras={'max_batch_size': low_batch_size})
class Resnet50BenchmarksInferenceCpu(BenchmarkBase):
""""Benchmarks for ResNet50 inference on CPU."""
def _shared_params(self):
"""Returns shared parameters for all ResNet50 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
num_gpus=1,
model='resnet50',
num_warmup_batches=5,
num_batches=50,
distortions=False,
forward_only=True,
device='cpu',
data_format='NHWC',
num_intra_threads=0)
def benchmark_synth_forward_batch1(self):
"""Tests 1 CPU batch size 1."""
params = self._shared_params()._replace(batch_size=1)
self._run_benchmark(params)
def benchmark_synth_forward_batch16(self):
"""Tests 1 CPU batch size 16."""
params = self._shared_params()._replace(batch_size=16)
self._run_benchmark(params)
class FrozenResnet50BenchmarksInferenceCpu(Resnet50BenchmarksInferenceCpu):
""""Benchmarks for ResNet50 frozen graph inference on CPU."""
def _shared_params(self):
return super(FrozenResnet50BenchmarksInferenceCpu,
self)._shared_params()._replace(freeze_when_forward_only=True)
class Resnet50BenchmarksInference(BenchmarkBase):
""""Benchmarks for ResNet50 inference."""
def _shared_params(self):
"""Returns shared parameters for all ResNet50 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
num_gpus=1, model='resnet50', distortions=False, forward_only=True)
def benchmark_synth_forward_batch128(self):
"""Tests 1 GPU batch size 128."""
params = self._shared_params()._replace(batch_size=128)
self._run_benchmark(params)
def benchmark_fp16_synth_forward_batch128(self):
"""Tests 1 GPU batch size 128 FP16."""
params = self._shared_params()._replace(batch_size=128, use_fp16=True)
self._run_benchmark(params)
def benchmark_fp16_synth_forward_batch16(self):
"""Tests 1 GPU batch size 16 FP16."""
params = self._shared_params()._replace(batch_size=16, use_fp16=True)
self._run_benchmark(params)
def benchmark_xla_synth_forward_batch128(self):
"""Tests 1 GPU batch size 128 with XLA."""
params = self._shared_params()._replace(batch_size=128, xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_forward_batch128(self):
"""Tests 1 GPU batch size 128 FP16 with XLA."""
params = self._shared_params()._replace(
batch_size=128, use_fp16=True, xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_forward_batch16(self):
"""Tests 1 GPU batch size 16 FP16 with XLA."""
params = self._shared_params()._replace(
batch_size=16, use_fp16=True, xla=True)
self._run_benchmark(params)
class FrozenResnet50BenchmarksInference(Resnet50BenchmarksInference):
""""Benchmarks for ResNet50 frozen graph inference."""
def _shared_params(self):
return super(FrozenResnet50BenchmarksInference,
self)._shared_params()._replace(freeze_when_forward_only=True)
def benchmark_trt_synth_forward_batch128(self):
"""Tests 1 GPU batch size 128."""
params = self._shared_params()._replace(batch_size=128, trt_mode='FP32')
self._run_benchmark(params)
# TODO(laigd): enable fp16 tests for TF-TRT, it's currently not supported yet.
# def benchmark_fp16_trt_synth_forward_batch128(self):
# """Tests 1 GPU batch size 128 FP16."""
# params = self._shared_params()._replace(
# batch_size=128, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
# Test with batch size 16 to compare with native TF GPU implementation and
# XLA.
# def benchmark_fp16_trt_synth_forward_batch16(self):
# """Tests 1 GPU batch size 16 FP16."""
# params = self._shared_params()._replace(
# batch_size=16, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
class Resnet50Benchmarks(BenchmarkBase):
""""Benchmark resnet50 configurations."""
def _shared_params(self):
"""Returns shared parameters for all ResNet50 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='resnet50', batch_size=128, distortions=False,
optimizer='momentum')
def _shared_params_fp16(self):
"""Returns shared parameters for all ResNet50 FP16 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='resnet50',
batch_size=256,
distortions=False,
use_fp16=True,
optimizer='momentum',
loss_type_to_report='base_loss',
compute_lr_on_cpu=True,
single_l2_loss_op=True
)
def benchmark_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data."""
params = self._shared_params()._replace(num_gpus=1)
self._run_benchmark(params)
def benchmark_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data."""
params = self._shared_params()._replace(
num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
self._run_benchmark(params)
def benchmark_synth_1gpu_max_batch_size(self):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server')
self._binary_search_batch_size(params, init_batch_size=128)
def benchmark_synth_4gpu_gpureplicated(self):
"""Tests 4 gpu with synthetic data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=4,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=8,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fake_8gpu_gpureplicated(self):
"""Tests 8 gpu with fake data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=8,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
# FP16 mixed-precision tests.
def benchmark_fp16_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params = self._shared_params_fp16()._replace(
num_gpus=1, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_1gpu_gpuparams_batch128(self):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params = self._shared_params_fp16()._replace(
num_gpus=1, batch_size=128, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_4gpu_gpureplicated(self):
"""Tests 4 gpu with synthetic data with nccl and all_reduce."""
params = self._shared_params_fp16()._replace(
num_gpus=4,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fp16_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic with nccl and all_reduce."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fp16_fake_1gpu_gpuparams(self):
"""Tests 1 gpus with fake data."""
params = self._shared_params_fp16()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_fake_8gpu_gpureplicated(self):
"""Tests 8 gpus with fake data."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fp16_fakedistort_8gpu_gpureplicated(self):
"""Tests 8 gpus with fake distorted data."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
data_dir=self.fake_data_dir,
data_name='imagenet',
distortions=True,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
# XLA versions of Resnet50 tests only for single GPU.
def benchmark_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params = self._shared_params_fp16()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
# Test does not run as part of continuous testing on guitar.
def benchmark_ng_xla_batch64_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with XLA, synth data, and batch 64."""
params = self._shared_params()._replace(
num_gpus=1, batch_size=64, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_batch64_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, XLA, synth data, and batch 64."""
params = self._shared_params_fp16()._replace(
num_gpus=1,
batch_size=64,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, XLA, and synth data."""
params = self._shared_params_fp16()._replace(
num_gpus=1,
batch_size=128,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
def benchmark_xla_synth_1gpu_max_batch_size(self):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._binary_search_batch_size(params, init_batch_size=128)
def benchmark_xla_real_1gpu_gpuparams(self):
"""Tests 1 gpu with real data with XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.data_dir,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
# Test does not run as part of continuous testing.
def benchmark_xla_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data with XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
# Test does not run as part of continuous testing.
def benchmark_xla_fakedistort_1gpu_gpuparams(self):
"""Tests 1 gpu with fake distorted data with XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
distortions=True,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
class Resnet50v15Benchmarks(BenchmarkBase):
""""Benchmark various ResNet50V1.5 configurations.
ResNetV1.5 differs from V1 in stride 2 is used in the first 3x3 convolution of
each block instead of the first 1x1 convolution.
"""
def _shared_params_fp16(self):
"""Returns shared parameters for all ResNet50v1.5 FP16 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='resnet50_v1.5',
batch_size=256,
distortions=False,
use_fp16=True,
optimizer='momentum',
loss_type_to_report='base_loss',
compute_lr_on_cpu=True,
single_l2_loss_op=True
)
def benchmark_fp16_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data."""
params = self._shared_params_fp16()._replace(num_gpus=1)
self._run_benchmark(params)
def benchmark_fp16_batch256_synth_8gpu_gpuparams(self):
"""Tests 8 gpus with synthetic data at batch 256."""
params = self._shared_params_fp16()._replace(num_gpus=8)
self._run_benchmark(params)
def benchmark_fp16_batch128_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params = self._shared_params_fp16()._replace(num_gpus=1, batch_size=128)
self._run_benchmark(params)
def benchmark_fp16_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data."""
params = self._shared_params_fp16()._replace(
num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
self._run_benchmark(params)
def benchmark_fp16_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
num_batches=200,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fp16_fake_8gpu_gpureplicated(self):
"""Tests 8 gpu with fake data with parameters replicated."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
num_batches=200,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2)
self._run_benchmark(params)
# XLA versions of Resnet50v1.5 tests.
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params = self._shared_params_fp16()._replace(num_gpus=1, xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, batch128, synthetic data with XLA."""
params = self._shared_params_fp16()._replace(
num_gpus=1, batch_size=128, xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data."""
params = self._shared_params_fp16()._replace(num_gpus=1, xla_compile=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_batch128_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params = self._shared_params_fp16()._replace(
num_gpus=1, num_batches=200, batch_size=128, xla_compile=True)
self._run_benchmark(params)
def benchmark_fp16_xla_batch256_synth_8gpu_gpuparams(self):
"""Tests 8 gpu with synthetic data and xla autojit."""
params = self._shared_params_fp16()._replace(
num_gpus=8, num_batches=200, batch_size=256, xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data."""
params = self._shared_params_fp16()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
xla_compile=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
num_batches=200,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
xla_compile=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
num_batches=200,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_fake_8gpu_gpureplicated(self):
"""Tests 8 gpu with fake data with parameters replicated."""
params = self._shared_params_fp16()._replace(
num_gpus=8,
num_batches=200,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
xla_compile=True)
self._run_benchmark(params)
class Vgg16Benchmarks(BenchmarkBase):
""""Benchmark various vgg16 configurations."""
def _shared_params(self):
"""Returns shared parameters for all vgg16 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='vgg16', batch_size=128, distortions=False)
def benchmark_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params = self._shared_params()._replace(
num_gpus=1, use_fp16=True, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_synth_8gpu_gpureplicated(self):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=8,
all_reduce_spec='nccl',
variable_update='replicated',
compact_gradient_transfer=False,
gradient_repacking=2)
self._run_benchmark(params)
# XLA versions of VGG16 tests only for single GPU.
def benchmark_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data and XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, synthetic data, and XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
self._run_benchmark(params)
# Test does not run as part of continuous testing.
def benchmark_xla_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data and XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
def benchmark_xla_real_1gpu_gpuparams(self):
"""Tests 1 gpu with real data and XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.data_dir,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
class TrivialBenchmarks(BenchmarkBase):
""""Benchmarks for trivial model.
The purpose of these tests is to verify the upper bound for the input
pipeline. Fake data creates an upperbound on the input pipeline throughput.
"""
def _shared_params(self):
"""Returns shared parameters for all trivial benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='trivial',
num_gpus=8,
distortions=False,
variable_update='independent',
data_dir=self.fake_data_dir)
def benchmark_fake_64batch(self):
params = self._shared_params()._replace(batch_size=64, data_name='imagenet')
self._run_benchmark(params)
def benchmark_fake_128batch(self):
params = self._shared_params()._replace(
batch_size=128, data_name='imagenet')
self._run_benchmark(params)
def benchmark_fake_256batch(self):
params = self._shared_params()._replace(
batch_size=256, data_name='imagenet')
self._run_benchmark(params)
def benchmark_fakedistort_128batch(self):
params = self._shared_params()._replace(
batch_size=128, data_name='imagenet', distortions=True)
self._run_benchmark(params)
class AlexnetBenchmarks(BenchmarkBase):
""""Benchmarks for alexnet."""
def _shared_params(self):
"""Returns shared parameters for all alexnet benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='alexnet', batch_size=512, distortions=False)
def benchmark_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params = self._shared_params()._replace(
num_gpus=1, use_fp16=True, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_synth_8gpu_gpureplicated(self):
"""Tests 8 gpus with synthetic data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=8,
variable_update='replicated',
all_reduce_spec='nccl',
compact_gradient_transfer=False,
gradient_repacking=2)
self._run_benchmark(params)
def benchmark_fake_8gpu_gpureplicated(self):
"""Tests 8 gpus with fake data with parameters replicated."""
params = self._shared_params()._replace(
num_gpus=8,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='replicated',
all_reduce_spec='nccl',
compact_gradient_transfer=False,
gradient_repacking=2)
self._run_benchmark(params)
# XLA Benchmark tests for AlexNet.
def benchmark_xla_synth_1gpuparams(self):
"""Tests 1 gpu with synthetic data and XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, synthetic data and XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
self._run_benchmark(params)
# Test does not run as part of continuous testing.
def benchmark_xla_fake_1gpuparams(self):
"""Tests 1 gpu with fake data and XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
def benchmark_xla_real_1gpuparams(self):
"""Tests 1 gpu with real data and XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.data_dir,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
class InceptionV3Benchmarks(BenchmarkBase):
""""Benchmark for InceptionV3."""
def _shared_params(self):
"""Returns shared parameters for all InceptionV3 benchmarks."""
return BenchmarkBase._shared_params(self)._replace(
model='inception3', batch_size=64, distortions=False)
def benchmark_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic data."""
params = self._shared_params()._replace(
num_gpus=1, use_fp16=True, variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_synth_1gpu_max_batch_size(self):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server')
self._binary_search_batch_size(params, init_batch_size=128)
def benchmark_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with synthetic and XLA."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
"""Tests 1 gpu with fp16, XLA and synthetic data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
self._run_benchmark(params)
def benchmark_xla_synth_1gpu_max_batch_size(self):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params = self._shared_params()._replace(
num_gpus=1, variable_update='parameter_server', xla=True)
self._binary_search_batch_size(params, init_batch_size=128)
# Test does not run as part of continuous testing.
def benchmark_xla_fake_1gpu_gpuparams(self):
"""Tests 1 gpu with fake data with XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.fake_data_dir,
data_name='imagenet',
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
def benchmark_xla_real_1gpu_gpuparams(self):
"""Tests 1 gpu with real data with XLA."""
params = self._shared_params()._replace(
num_gpus=1,
data_dir=self.data_dir,
variable_update='parameter_server',
xla=True)
self._run_benchmark(params)
class NcfBenchmarks(BenchmarkBase):
"""Benchmarks for neural collaborative filtering."""
def _shared_params(self):
return BenchmarkBase._shared_params(self)._replace(
model='ncf', batch_size=64*1024, num_gpus=1, num_warmup_batches=1)
def benchmark_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_fp16_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', use_fp16=True)
self._run_benchmark(params)
def benchmark_xla_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla=True, use_fp16=True)
self._run_benchmark(params)
def benchmark_xla_compile_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla_compile=True)
self._run_benchmark(params)
def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla_compile=True, use_fp16=True)
self._run_benchmark(params)
class DeepSpeech2Benchmarks(BenchmarkBase):
"""Benchmarks for DeepSpeech2 model."""
def _shared_params(self):
return BenchmarkBase._shared_params(self)._replace(
model='deepspeech2', batch_size=32, num_gpus=1, data_name='librispeech')
def benchmark_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(variable_update='parameter_server')
self._run_benchmark(params)
def benchmark_xla_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla=True)
self._run_benchmark(params)
def benchmark_xla_compile_synth_1gpu_gpuparams(self):
params = self._shared_params()._replace(
variable_update='parameter_server', xla_compile=True)
self._run_benchmark(params)
class SsdBenchmarks(BenchmarkBase):
"""Benchmarks for SSD model."""
def _cudnn_version(self):
if sys.platform == 'win32':
return None
lib = ctypes.cdll.LoadLibrary(None)
if hasattr(lib, 'cudnnGetErrorString'):
version = lib.cudnnGetVersion()
return version
return None
def _shared_params(self):
cudnn_version = self._cudnn_version()
if cudnn_version is None or cudnn_version < 7300:
raise RuntimeError(
'Needs at least cuDNN 7.3 to work with fp16 (b/112048183). '
'Build with --define=use_experimental_cudnn=1')
return BenchmarkBase._shared_params(self)._replace(
# TODO(b/115672206): Replace backbone model and data dir with replicated
# placer location for better performance.
backbone_model_path=platforms_util.get_ssd_backborn_model_file(), # pylint: disable=line-too-long
data_dir=platforms_util.get_ssd_backboard_data_dir(),
batch_size=128,
data_name='coco',
model='ssd300',
num_batches=10,
num_warmup_batches=1,
num_gpus=1,
optimizer='momentum',
momentum=0.9,
weight_decay=5e-4,
loss_type_to_report='base_loss',
single_l2_loss_op=True,
compute_lr_on_cpu=True,
)
def benchmark_xla_compile_real_1gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=1,
xla_compile=True,
)
self._run_benchmark(params)
def benchmark_real_1gpu_gpuparams(self):
params = self._shared_params()._replace(num_gpus=1,)
self._run_benchmark(params)
def benchmark_xla_compile_fp16_real_1gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=1, xla_compile=True, use_fp16=True)
self._run_benchmark(params)
def benchmark_fp16_real_1gpu_gpuparams(self):
params = self._shared_params()._replace(num_gpus=1, use_fp16=True)
self._run_benchmark(params)
def benchmark_xla_compile_real_8gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=8,
xla_compile=True,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
num_batches=50,
)
self._run_benchmark(params)
def benchmark_real_8gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=8,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
num_batches=50,
)
self._run_benchmark(params)
def benchmark_xla_compile_fp16_real_8gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=8,
xla_compile=True,
use_fp16=True,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
num_batches=50,
)
self._run_benchmark(params)
def benchmark_fp16_real_8gpu_gpuparams(self):
params = self._shared_params()._replace(
num_gpus=8,
use_fp16=True,
variable_update='replicated',
all_reduce_spec='nccl',
gradient_repacking=2,
num_batches=50,
)
self._run_benchmark(params)
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions related to MLPerf compliance.
MLPerf requires submissions to log what the benchmark does, in order to verify
that the benchmark meets the MLPerf requirements. This module contains a global
object `logger` that is used by other files to log what tf_cnn_benchmarks does
for compliance.
By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
unnecessary if one is not concerned about MLPerf compliance. The logger can be
enabled by using the `mlperf_logger` context manager.
To enable the logger with `mlperf_logger`, the MLPerf compliance library at
https://github.com/mlperf/training/tree/master/compliance is required. If
the logger is not enabled, the library is not needed.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
import contextlib
import os
import sys
import tensorflow.compat.v1 as tf
# pylint: disable=g-import-not-at-top
try:
# Not all users have the MLPerf compliance library, so we don't want to
# unconditionally crash if these imports fail.
from mlperf_compliance import mlperf_log
from mlperf_compliance import resnet_log_helper
from mlperf_compliance import tags
import_successful = True
except ImportError:
# The logger cannot be enabled in this case since the MLPerf library isn't
# found. We return empty strings from the `tags` attribute so that
# the benchmark can still run without crashing. This empty tags are passed
# to an instance of `NullMlPerfLogger`, which does not log anything and
# ignores the tag values.
class _Tags(object):
def __getattr__(self, item):
return ''
tags = _Tags()
import_successful = False
# pylint: enable=g-import-not-at-top
_ModelInfo = namedtuple('_ModelInfo', ['print_fn', 'tag_set',
'mlperf_model_name'])
_MLPERF_LOG_PREFIX = ':::MLPv0.5.0'
class MlPerfLogger(object):
"""Logs various aspects about a benchmark run for MLPerf compliance."""
def __init__(self, model):
self._root_dir = os.path.split(os.path.abspath(__file__))[0]
mlperf_log.ROOT_DIR_RESNET = self._root_dir
mlperf_log.ROOT_DIR_SSD = self._root_dir
self.model = model
model_to_info = {
'resnet50_v1.5': _ModelInfo(mlperf_log.resnet_print,
mlperf_log.RESNET_TAG_SET, tags.RESNET),
'ssd300': _ModelInfo(mlperf_log.ssd_print, mlperf_log.SSD_TAG_SET,
tags.SSD)
}
try:
self._log_fn, self.tag_set, self.mlperf_model_name = model_to_info[model]
except KeyError:
raise ValueError('--ml_perf_compliance_logging is only compatible when '
'--model is one of the following: ' +
', '.join(model_to_info.keys()))
def log(self, key, value=None, stack_offset=2):
if key in self.tag_set:
self._log_fn(key, value, stack_offset)
else:
print('Ignoring MLPerf logging item key=%s, value=%s for model %s' %
(key, value, self.model))
def log_deferred_tensor_value(self, key, tensor_value, global_step,
stack_offset=2, every_n=1):
"""Logs the value of a tensor when the graph is run."""
caller = '(%s)' % mlperf_log.get_caller(stack_offset, self._root_dir)
def create_print_op():
return tf.print(_MLPERF_LOG_PREFIX, self.mlperf_model_name,
tf.timestamp(), caller, key,
': { "deferred": true, "value":', tensor_value, '}',
output_stream=sys.stdout)
maybe_print = tf.cond(tf.equal(global_step % every_n, 0), create_print_op,
tf.no_op)
with tf.control_dependencies([maybe_print]):
return tf.identity(tensor_value)
def log_max_pool(self, input_tensor, output_tensor):
if self.model == 'resnet50_v1.5':
resnet_log_helper.log_max_pool(input_tensor, output_tensor)
def log_begin_block(self, input_tensor, block_type):
if self.model == 'resnet50_v1.5':
resnet_log_helper.log_begin_block(input_tensor, block_type)
def log_end_block(self, output_tensor):
if self.model == 'resnet50_v1.5':
resnet_log_helper.log_end_block(output_tensor)
def log_projection(self, input_tensor, output_tensor):
if self.model == 'resnet50_v1.5':
resnet_log_helper.log_projection(input_tensor, output_tensor)
def log_conv2d(self, input_tensor, output_tensor, stride_height, stride_width,
filters, initializer, use_bias):
"""Log a conv2d call."""
if self.model == 'resnet50_v1.5':
assert stride_height == stride_width, (
'--ml_perf_compliance_logging does not support convolutions where '
'the stride height is not equal to the stride width. '
'stride_height=%d, stride_width=%d' % (stride_height, stride_width))
if isinstance(initializer, tf.truncated_normal_initializer) or (
isinstance(initializer, tf.variance_scaling_initializer) and
initializer.distribution == 'truncated_normal'):
initializer = tags.TRUNCATED_NORMAL
elif (isinstance(initializer, tf.glorot_uniform_initializer) or
initializer is None):
initializer = 'glorot_uniform'
resnet_log_helper.log_conv2d(input_tensor, output_tensor, stride_width,
filters, initializer, use_bias)
def log_batch_norm(self, input_tensor, output_tensor, momentum, epsilon,
center, scale, training):
if self.model == 'resnet50_v1.5':
resnet_log_helper.log_batch_norm(input_tensor, output_tensor, momentum,
epsilon, center, scale, training)
def log_train_epochs(self, num_epochs):
"""Logs all the TRAIN_EPOCHs log lines."""
num_epochs_int = int(num_epochs)
for i in range(num_epochs_int):
# MLPerf allows us to print all the train epochs at once instead of
# printing them as we do them.
self.log(key=mlperf_log.TRAIN_EPOCH, value=i, stack_offset=3)
if num_epochs_int != num_epochs:
value = (str(num_epochs_int) +
', but this epoch only has {}% of the examples of a normal epoch'
.format(100 * (num_epochs - num_epochs_int)))
self.log(key=mlperf_log.TRAIN_EPOCH, value=value, stack_offset=3)
def log_input_resize_aspect_preserving(self, height, width, scale_factor):
assert height == width, (
'--ml_perf_compliance_logging does not support models with nonsquare '
'images. Cannot process image with height=%d and width=%d' %
(height, width))
self.log(key=tags.INPUT_RESIZE_ASPECT_PRESERVING,
value={'min': int(height * scale_factor)})
def log_eval_epoch(self, tag, global_step, batch_size, stack_offset=2):
if self.model == 'resnet50_v1.5':
self.log(key=tag, stack_offset=stack_offset+1)
elif self.model == 'ssd300':
epoch = int(global_step * batch_size / 118287)
self.log(key=tag, value=epoch, stack_offset=stack_offset+1)
def log_eval_accuracy(self, accuracy, global_step, batch_size,
examples_per_epoch, stack_offset=2):
"""Logs eval accuracy."""
epoch = int(global_step * batch_size / examples_per_epoch)
eval_accuracy = {'epoch': epoch, 'value': accuracy}
eval_iteration_accuracy = {'iteration': global_step, 'value': accuracy}
self.log(key=tags.EVAL_ACCURACY, value=eval_accuracy,
stack_offset=stack_offset+1)
self.log(key=tags.EVAL_ITERATION_ACCURACY,
value=eval_iteration_accuracy,
stack_offset=stack_offset+1)
def _empty_fn(*args, **kwargs):
del args, kwargs
class NullMlPerfLogger(object):
"""A version of `MlPerfLogger` that does not log anything.
This class has the same interface as `MlPerfLogger`, but does not actually do
anything. This is used when logging is disabled, which is the default
behavior.
"""
def __getattr__(self, item):
return _empty_fn
def log_deferred_tensor_value(self, key, tensor_value, *args, **kwargs):
del key, args, kwargs
return tensor_value
# A global singleton logger. By default, it's the null logger but can be
# switched to an MlPerfLogger with `mlperf_logger()`.
logger = NullMlPerfLogger()
@contextlib.contextmanager
def mlperf_logger(use_mlperf_logger, model):
"""Optionally enable the mlperf logger.
If `use_mlperf_logger` is True, sets the `logger` global variable to an
instance of MlPerfLogger that will print logs for MLPerf compliance. If
`use_mlperf_logger` is False, does nothing.
Args:
use_mlperf_logger: If True, enables the mlperf logger. If False, this
function does nothing.
model: The model that will be logged. Required, because different models
must log different things for MLPerf compliance.
Yields:
Nothing.
Raises:
ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
library cannot be imported
"""
global logger
if use_mlperf_logger:
if not import_successful:
raise ImportError('Failed to import MLPerf compliance library, which is '
'required when --ml_perf_compliance_logging is '
'specified. Clone this repo and add this directory '
'https://github.com/mlperf/training/tree/master/'
'compliance to the PYTHONPATH environmental variable.')
logger_ = MlPerfLogger(model)
old_logger = logger
try:
logger = logger_
yield
finally:
logger = old_logger
else:
yield
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains tests related to MLPerf.
Note this test only passes if the MLPerf compliance library is installed.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import Counter
import logging
import re
import six
import tensorflow.compat.v1 as tf
import benchmark_cnn
import datasets
import mlperf
import test_util
from models import model
from mlperf_compliance import mlperf_log
class _MlPerfTestModel(model.CNNModel):
"""A model to test the MLPerf compliance logging on."""
def __init__(self):
super(_MlPerfTestModel, self).__init__(
'mlperf_test_model', image_size=224, batch_size=2, learning_rate=1)
def add_inference(self, cnn):
assert cnn.top_layer.shape[1:] == (3, 224, 224)
cnn.conv(1, 1, 1, 1, 1, use_batch_norm=True)
cnn.mpool(1, 1, 1, 1, num_channels_in=1)
cnn.reshape([-1, 224 * 224])
cnn.affine(1, activation=None)
# Assert that the batch norm variables are filtered out for L2 loss.
variables = tf.global_variables() + tf.local_variables()
assert len(variables) > len(self.filter_l2_loss_vars(variables))
class MlPerfComplianceTest(tf.test.TestCase):
"""Tests the MLPerf compliance logs.
This serves as a quick check that we probably didn't break the compliance
logging. It is not mean to be as comprehensive as the official MLPerf
compliance checker will be.
"""
def setUp(self):
super(MlPerfComplianceTest, self).setUp()
benchmark_cnn.setup(benchmark_cnn.make_params())
# Map between regex and the number of times we expect to see that regex in the
# logs. Entry commented out with the comment FIXME indicate that
# tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
# fixed to be MLPerf compliant.
EXPECTED_LOG_REGEXES = {
# Preprocessing tags
mlperf.tags.INPUT_ORDER: 2, # 1 for training, 1 for eval
# We pass --tf_random_seed=9876 in the test.
r'%s: 9876' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
# The Numpy random seed is hardcoded to 4321.
r'%s: 4321' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
r'%s: %d' % (mlperf.tags.PREPROC_NUM_TRAIN_EXAMPLES,
datasets.IMAGENET_NUM_TRAIN_IMAGES): 1,
r'%s: %d' % (mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES,
datasets.IMAGENET_NUM_VAL_IMAGES): 1,
mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES + '.*': 1,
mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV + '.*': 1,
mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE + '.*': 1,
mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE + '.*': 1,
mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS + '.*': 1,
mlperf.tags.INPUT_RANDOM_FLIP + '.*': 1,
r'%s: \[224, 224\].*' % mlperf.tags.INPUT_CENTRAL_CROP: 1,
r'%s: \[123.68, 116.78, 103.94\].*' % mlperf.tags.INPUT_MEAN_SUBTRACTION:
2,
r'%s: {"min": 256}.*' % mlperf.tags.INPUT_RESIZE_ASPECT_PRESERVING: 1,
# 1 for training, 1 for eval
r'%s: \[224, 224\].*' % mlperf.tags.INPUT_RESIZE: 2,
# Resnet model tags
mlperf.tags.MODEL_HP_BATCH_NORM + '.*': 2,
# 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
# produces 2 logs.
mlperf.tags.MODEL_HP_CONV2D_FIXED_PADDING + '.*': 4,
mlperf.tags.MODEL_HP_RELU + '.*': 2,
mlperf.tags.MODEL_HP_INITIAL_MAX_POOL + '.*': 2,
mlperf.tags.MODEL_HP_DENSE + '.*': 4,
mlperf.tags.MODEL_HP_DENSE + '.*': 4,
# Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
# are omitted here.
r'%s: "categorical_cross_entropy".*' % mlperf.tags.MODEL_HP_LOSS_FN: 1,
# 1 for training, 2 because the _MlPerfTestModel calls this when building
# the model for both training and eval
r'%s: true' % mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2: 3,
r'%s: 0.5.*' % mlperf.tags.MODEL_L2_REGULARIZATION: 1,
# Note we do not handle OPT_LR, since that is printed to stderr using
# tf.Print, which we cannot easily intercept.
# Other tags
'%s: "%s"' % (mlperf.tags.OPT_NAME, mlperf.tags.SGD_WITH_MOMENTUM): 1,
'%s: 0.5' % mlperf.tags.OPT_MOMENTUM: 1,
mlperf.tags.RUN_START: 1,
'%s: 2' % mlperf.tags.INPUT_BATCH_SIZE: 1,
mlperf.tags.TRAIN_LOOP: 1,
mlperf.tags.TRAIN_EPOCH + '.*': 1,
'%s: 2' % mlperf.tags.INPUT_SIZE: 2,
mlperf.tags.EVAL_START: 2,
mlperf.tags.EVAL_STOP: 2,
'%s: 6' % mlperf.tags.EVAL_SIZE: 2,
mlperf.tags.EVAL_ACCURACY + '.*': 2,
'%s: 2.0' % mlperf.tags.EVAL_TARGET: 2,
mlperf.tags.RUN_STOP + '.*': 1,
mlperf.tags.RUN_FINAL: 1
}
EXPECTED_LOG_REGEXES = Counter({re.compile(k): v for
k, v in EXPECTED_LOG_REGEXES.items()})
def testMlPerfCompliance(self):
string_io = six.StringIO()
handler = logging.StreamHandler(string_io)
data_dir = test_util.create_black_and_white_images()
try:
mlperf_log.LOGGER.addHandler(handler)
params = benchmark_cnn.make_params(data_dir=data_dir,
data_name='imagenet',
batch_size=2,
num_warmup_batches=0,
num_batches=2,
num_eval_batches=3,
eval_during_training_every_n_steps=1,
distortions=False,
weight_decay=0.5,
optimizer='momentum',
momentum=0.5,
stop_at_top_1_accuracy=2.0,
tf_random_seed=9876,
ml_perf=True)
with mlperf.mlperf_logger(use_mlperf_logger=True, model='resnet50_v1.5'):
bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=_MlPerfTestModel())
bench_cnn.run()
logs = string_io.getvalue().splitlines()
log_regexes = Counter()
for log in logs:
for regex in self.EXPECTED_LOG_REGEXES:
if regex.search(log):
log_regexes[regex] += 1
if log_regexes != self.EXPECTED_LOG_REGEXES:
diff_counter = Counter(log_regexes)
diff_counter.subtract(self.EXPECTED_LOG_REGEXES)
differences = []
for regex in (k for k in diff_counter.keys() if diff_counter[k]):
found_count = log_regexes[regex]
expected_count = self.EXPECTED_LOG_REGEXES[regex]
differences.append(' For regex %s: Found %d lines matching but '
'expected to find %d' %
(regex.pattern, found_count, expected_count))
raise AssertionError('Logs did not match expected logs. Differences:\n'
'%s' % '\n'.join(differences))
finally:
mlperf_log.LOGGER.removeHandler(handler)
if __name__ == '__main__':
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Alexnet model configuration.
References:
Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
ImageNet Classification with Deep Convolutional Neural Networks
Advances in Neural Information Processing Systems. 2012
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from models import model
class AlexnetModel(model.CNNModel):
"""Alexnet cnn model."""
def __init__(self, params=None):
super(AlexnetModel, self).__init__(
'alexnet', 224 + 3, 512, 0.005, params=params)
def add_inference(self, cnn):
# Note: VALID requires padding the images by 3 in width and height
cnn.conv(64, 11, 11, 4, 4, 'VALID')
cnn.mpool(3, 3, 2, 2)
cnn.conv(192, 5, 5)
cnn.mpool(3, 3, 2, 2)
cnn.conv(384, 3, 3)
cnn.conv(384, 3, 3)
cnn.conv(256, 3, 3)
cnn.mpool(3, 3, 2, 2)
cnn.reshape([-1, 256 * 6 * 6])
cnn.affine(4096)
cnn.dropout()
cnn.affine(4096)
cnn.dropout()
class AlexnetCifar10Model(model.CNNModel):
"""Alexnet cnn model for cifar datasets.
The model architecture follows the one defined in the tensorflow tutorial
model.
Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
"""
def __init__(self, params=None):
super(AlexnetCifar10Model, self).__init__(
'alexnet', 32, 128, 0.1, params=params)
def add_inference(self, cnn):
cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2)
cnn.mpool(3, 3, 2, 2, mode='SAME')
cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2)
cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
cnn.mpool(3, 3, 2, 2, mode='SAME')
shape = cnn.top_layer.get_shape().as_list()
flat_dim = shape[1] * shape[2] * shape[3]
cnn.reshape([-1, flat_dim])
cnn.affine(384, stddev=0.04, bias=0.1)
cnn.affine(192, stddev=0.04, bias=0.1)
def get_learning_rate(self, global_step, batch_size):
num_examples_per_epoch = 50000
num_epochs_per_decay = 100
decay_steps = (
num_epochs_per_decay * num_examples_per_epoch // batch_size)
decay_factor = 0.1
return tf.train.exponential_decay(
self.learning_rate,
global_step,
decay_steps,
decay_factor,
staircase=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment