Unverified Commit acc6f6d7 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Combined imagenet and cifar-10 estimator tests (#6672)

* Combined imagenet and cifar-10 benchmarks

* Comments and epochs_between_evals.

* Added tuned tests and cleaned up benchmark flags

* Fix names.

* Return results and add images/sec hook.

* updated doc strings for return values.

* 128 to 256 batch for FP16 test

* added more doc strings to fix lint.
parent 67c403fc
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Executes Estimator benchmarks and accuracy tests.""" """Executes Estimator benchmarks and accuracy tests."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -26,14 +25,271 @@ from absl.testing import flagsaver ...@@ -26,14 +25,271 @@ from absl.testing import flagsaver
import tensorflow as tf # pylint: disable=g-bad-import-order import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import cifar10_main as cifar_main from official.resnet import cifar10_main as cifar_main
from official.resnet import imagenet_main
from official.utils.logs import hooks from official.utils.logs import hooks
MIN_TOP_1_ACCURACY = 0.926 IMAGENET_DATA_DIR_NAME = 'imagenet'
MAX_TOP_1_ACCURACY = 0.938 CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
FLAGS = flags.FLAGS
class EstimatorBenchmark(tf.test.Benchmark):
"""Base class to hold methods common to test classes in the module.
class EstimatorCifar10BenchmarkTests(tf.test.Benchmark): Code under test for Estimator models (ResNet50 and 56) report mostly the
"""Benchmarks and accuracy tests for Estimator ResNet56.""" same data and require the same FLAG setup.
"""
local_flags = None
def __init__(self, output_dir=None, default_flags=None, flag_methods=None):
if not output_dir:
output_dir = '/tmp'
self.output_dir = output_dir
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
def _get_model_dir(self, folder_name):
"""Returns directory to store info, e.g. saved model and event log."""
return os.path.join(self.output_dir, folder_name)
def _setup(self):
"""Sets up and resets flags before each test."""
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
if EstimatorBenchmark.local_flags is None:
for flag_method in self.flag_methods:
flag_method()
# Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo'])
# Overrides flag values with defaults for the class of tests.
for k, v in self.default_flags.items():
setattr(FLAGS, k, v)
saved_flag_values = flagsaver.save_flag_values()
EstimatorBenchmark.local_flags = saved_flag_values
else:
flagsaver.restore_flag_values(EstimatorBenchmark.local_flags)
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from estimator models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
"""
examples_per_sec_hook = None
for hook in stats['train_hooks']:
if isinstance(hook, hooks.ExamplesPerSecondHook):
examples_per_sec_hook = hook
break
eval_results = stats['eval_results']
metrics = []
if 'accuracy' in eval_results:
metrics.append({'name': 'accuracy_top_1',
'value': eval_results['accuracy'].item(),
'min_value': top_1_min,
'max_value': top_1_max})
if 'accuracy_top_5' in eval_results:
metrics.append({'name': 'accuracy_top_5',
'value': eval_results['accuracy_top_5'].item()})
if examples_per_sec_hook:
exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list
# ExamplesPerSecondHook skips the first 10 steps.
exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list))
metrics.append({'name': 'exp_per_second',
'value': exp_per_sec})
self.report_benchmark(
iters=eval_results['global_step'],
wall_time=wall_time_sec,
metrics=metrics)
class Resnet50EstimatorAccuracy(EstimatorBenchmark):
"""Benchmark accuracy tests for ResNet50 w/ Estimator."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Benchmark accuracy tests for ResNet50 w/ Estimator.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods = [imagenet_main.define_imagenet_flags]
self.data_dir = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
super(Resnet50EstimatorAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_graph_8_gpu(self):
"""Test 8 GPUs graph mode."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu(self):
"""Test FP16 8 GPUs graph mode."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = imagenet_main.run_imagenet(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
self._report_benchmark(stats,
wall_time_sec,
top_1_min=0.762,
top_1_max=0.766)
class Resnet50EstimatorBenchmark(EstimatorBenchmark):
"""Benchmarks for ResNet50 using Estimator."""
local_flags = None
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [imagenet_main.define_imagenet_flags]
super(Resnet50EstimatorBenchmark, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=flag_methods)
def benchmark_graph_fp16_1_gpu(self):
"""Benchmarks graph fp16 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
FLAGS.batch_size = 128
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_1_gpu_tweaked(self):
"""Benchmarks graph fp16 1 gpu tweaked."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu_tweaked')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Benchmarks graph 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
FLAGS.batch_size = 128
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self):
"""Benchmarks graph 8 gpus."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
FLAGS.batch_size = 128*8
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu(self):
"""Benchmarks graph fp16 8 gpus."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu_tweaked(self):
"""Benchmarks graph fp16 8 gpus tweaked."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu_tweaked')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = imagenet_main.run_imagenet(FLAGS)
wall_time_sec = time.time() - start_time_sec
print(stats)
# Remove values to skip triggering accuracy check.
del stats['eval_results']['accuracy']
del stats['eval_results']['accuracy_top_5']
self._report_benchmark(stats,
wall_time_sec)
class Resnet50EstimatorBenchmarkSynth(Resnet50EstimatorBenchmark):
"""Resnet50 synthetic benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['use_synthetic_data'] = True
def_flags['max_train_steps'] = 110
def_flags['train_epochs'] = 1
super(Resnet50EstimatorBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet50EstimatorBenchmarkReal(Resnet50EstimatorBenchmark):
"""Resnet50 real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['data_dir'] = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
def_flags['max_train_steps'] = 110
def_flags['train_epochs'] = 1
super(Resnet50EstimatorBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet56EstimatorAccuracy(EstimatorBenchmark):
"""Accuracy tests for Estimator ResNet56."""
local_flags = None local_flags = None
...@@ -47,57 +303,59 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark): ...@@ -47,57 +303,59 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark):
constructor forward compatible in case PerfZero provides more constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor. named arguments before updating the constructor.
""" """
flag_methods = [cifar_main.define_cifar_flags]
self.output_dir = output_dir self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
self.data_dir = os.path.join(root_data_dir, 'cifar-10-batches-bin') super(Resnet56EstimatorAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def resnet56_1_gpu(self): def benchmark_graph_1_gpu(self):
"""Test layers model with Estimator and distribution strategies.""" """Test layers model with Estimator and distribution strategies."""
self._setup() self._setup()
flags.FLAGS.num_gpus = 1 flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = self.data_dir flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128 flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182 flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu') flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
flags.FLAGS.resnet_size = 56 flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32' flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook'] flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark() self._run_and_report_benchmark()
def resnet56_fp16_1_gpu(self): def benchmark_graph_fp16_1_gpu(self):
"""Test layers FP16 model with Estimator and distribution strategies.""" """Test layers FP16 model with Estimator and distribution strategies."""
self._setup() self._setup()
flags.FLAGS.num_gpus = 1 flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = self.data_dir flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128 flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182 flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_1_gpu') flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
flags.FLAGS.resnet_size = 56 flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16' flags.FLAGS.dtype = 'fp16'
flags.FLAGS.hooks = ['ExamplesPerSecondHook'] flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark() self._run_and_report_benchmark()
def resnet56_2_gpu(self): def benchmark_graph_2_gpu(self):
"""Test layers model with Estimator and dist_strat. 2 GPUs.""" """Test layers model with Estimator and dist_strat. 2 GPUs."""
self._setup() self._setup()
flags.FLAGS.num_gpus = 2 flags.FLAGS.num_gpus = 2
flags.FLAGS.data_dir = self.data_dir flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128 flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182 flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_2_gpu') flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
flags.FLAGS.resnet_size = 56 flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32' flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook'] flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark() self._run_and_report_benchmark()
def resnet56_fp16_2_gpu(self): def benchmark_graph_fp16_2_gpu(self):
"""Test layers FP16 model with Estimator and dist_strat. 2 GPUs.""" """Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
self._setup() self._setup()
flags.FLAGS.num_gpus = 2 flags.FLAGS.num_gpus = 2
flags.FLAGS.data_dir = self.data_dir flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128 flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182 flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_2_gpu') flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_2_gpu')
flags.FLAGS.resnet_size = 56 flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16' flags.FLAGS.dtype = 'fp16'
flags.FLAGS.hooks = ['ExamplesPerSecondHook'] flags.FLAGS.hooks = ['ExamplesPerSecondHook']
...@@ -110,7 +368,7 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark): ...@@ -110,7 +368,7 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark):
flags.FLAGS.data_dir = self.data_dir flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128 flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 1 flags.FLAGS.train_epochs = 1
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu') flags.FLAGS.model_dir = self._get_model_dir('unit_test')
flags.FLAGS.resnet_size = 8 flags.FLAGS.resnet_size = 8
flags.FLAGS.dtype = 'fp32' flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook'] flags.FLAGS.hooks = ['ExamplesPerSecondHook']
...@@ -122,42 +380,7 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark): ...@@ -122,42 +380,7 @@ class EstimatorCifar10BenchmarkTests(tf.test.Benchmark):
stats = cifar_main.run_cifar(flags.FLAGS) stats = cifar_main.run_cifar(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
examples_per_sec_hook = None self._report_benchmark(stats,
for hook in stats['train_hooks']: wall_time_sec,
if isinstance(hook, hooks.ExamplesPerSecondHook): top_1_min=0.926,
examples_per_sec_hook = hook top_1_max=0.938)
break
eval_results = stats['eval_results']
metrics = []
metrics.append({'name': 'accuracy_top_1',
'value': eval_results['accuracy'].item(),
'min_value': MIN_TOP_1_ACCURACY,
'max_value': MAX_TOP_1_ACCURACY})
metrics.append({'name': 'accuracy_top_5',
'value': eval_results['accuracy_top_5'].item()})
if examples_per_sec_hook:
exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list
# ExamplesPerSecondHook skips the first 10 steps.
exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list))
metrics.append({'name': 'exp_per_second',
'value': exp_per_sec})
self.report_benchmark(
iters=eval_results['global_step'],
wall_time=wall_time_sec,
metrics=metrics)
def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name)
def _setup(self):
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
if EstimatorCifar10BenchmarkTests.local_flags is None:
cifar_main.define_cifar_flags()
# Loads flags to get defaults to then override.
flags.FLAGS(['foo'])
saved_flag_values = flagsaver.save_flag_values()
EstimatorCifar10BenchmarkTests.local_flags = saved_flag_values
return
flagsaver.restore_flag_values(EstimatorCifar10BenchmarkTests.local_flags)
...@@ -360,15 +360,23 @@ def run_imagenet(flags_obj): ...@@ -360,15 +360,23 @@ def run_imagenet(flags_obj):
Args: Args:
flags_obj: An object containing parsed flag values. flags_obj: An object containing parsed flag values.
Returns:
Dict of results of the run. Contains the keys `eval_results` and
`train_hooks`. `eval_results` contains accuracy (top_1) and
accuracy_top_5. `train_hooks` is a list the instances of hooks used during
training.
""" """
input_function = (flags_obj.use_synthetic_data and input_function = (flags_obj.use_synthetic_data and
get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
input_fn) input_fn)
resnet_run_loop.resnet_main( result = resnet_run_loop.resnet_main(
flags_obj, imagenet_model_fn, input_function, DATASET_NAME, flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS]) shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])
return result
def main(_): def main(_):
with logger.benchmark_context(flags.FLAGS): with logger.benchmark_context(flags.FLAGS):
......
...@@ -525,8 +525,9 @@ def resnet_main( ...@@ -525,8 +525,9 @@ def resnet_main(
shape: list of ints representing the shape of the images used for training. shape: list of ints representing the shape of the images used for training.
This is only used if flags_obj.export_dir is passed. This is only used if flags_obj.export_dir is passed.
Returns: Dict of results of the run. Contains the keys `eval_results` and
Dict of results of the run. `train_hooks`. `eval_results` contains accuracy (top_1) and accuracy_top_5.
`train_hooks` is a list the instances of hooks used during training.
""" """
model_helpers.apply_clean(flags.FLAGS) model_helpers.apply_clean(flags.FLAGS)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment