Commit a1bd019e authored by Dong Lin's avatar Dong Lin Committed by Toby Boyd
Browse files

Export benchmark stats using tf.test.Benchmark.report_benchmark() (#6103)

* Export benchmark stats using tf.test.Benchmark.report_benchmark()

* Fix python style using pyformat
parent ed6c805a
......@@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import time
import os
from absl import flags
......@@ -29,13 +31,12 @@ from official.resnet import cifar10_main as cifar_main
DATA_DIR = '/data/cifar10_data/cifar-10-batches-bin'
class EstimatorCifar10BenchmarkTests(object):
class EstimatorCifar10BenchmarkTests(tf.test.Benchmark):
"""Benchmarks and accuracy tests for Estimator ResNet56."""
local_flags = None
def __init__(self, output_dir=None):
self.oss_report_object = None
self.output_dir = output_dir
def resnet56_1_gpu(self):
......@@ -48,8 +49,7 @@ class EstimatorCifar10BenchmarkTests(object):
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
stats = cifar_main.run_cifar(flags.FLAGS)
self._fill_report_object(stats)
self._run_and_report_benchmark()
def resnet56_fp16_1_gpu(self):
"""Test layers FP16 model with Estimator and distribution strategies."""
......@@ -61,8 +61,7 @@ class EstimatorCifar10BenchmarkTests(object):
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
stats = cifar_main.run_cifar(flags.FLAGS)
self._fill_report_object(stats)
self._run_and_report_benchmark()
def resnet56_2_gpu(self):
"""Test layers model with Estimator and dist_strat. 2 GPUs."""
......@@ -74,8 +73,7 @@ class EstimatorCifar10BenchmarkTests(object):
flags.FLAGS.model_dir = self._get_model_dir('resnet56_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
stats = cifar_main.run_cifar(flags.FLAGS)
self._fill_report_object(stats)
self._run_and_report_benchmark()
def resnet56_fp16_2_gpu(self):
"""Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
......@@ -87,16 +85,60 @@ class EstimatorCifar10BenchmarkTests(object):
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
def unit_test(self):
"""A lightweigth test that can finish quickly"""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 1
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu')
flags.FLAGS.resnet_size = 8
flags.FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = cifar_main.run_cifar(flags.FLAGS)
self._fill_report_object(stats)
def _fill_report_object(self, stats):
# Also "available global_step"
if self.oss_report_object:
self.oss_report_object.top_1 = stats['accuracy'].item()
self.oss_report_object.top_5 = stats['accuracy_top_5'].item()
else:
raise ValueError('oss_report_object has not been set.')
wall_time_sec = time.time() - start_time_sec
self.report_benchmark(
iters=stats['global_step'],
wall_time=wall_time_sec,
extras={
'accuracy':
self._json_description(stats['accuracy'].item(), priority=0),
'accuracy_top_5':
self._json_description(stats['accuracy_top_5'].item()),
})
def _json_description(self,
value,
priority=None,
min_value=None,
max_value=None):
"""Get a json-formatted string describing the attributes for a metric"""
attributes = {}
attributes['value'] = value
if priority:
attributes['priority'] = priority
if min_value:
attributes['min_value'] = min_value
if max_value:
attributes['max_value'] = max_value
if min_value or max_value:
succeeded = True
if min_value and value < min_value:
succeeded = False
if max_value and value > max_value:
succeeded = False
attributes['succeeded'] = succeeded
return json.dumps(attributes)
def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name)
......
......@@ -19,6 +19,8 @@ from __future__ import division
from __future__ import print_function
import os
import time
import json
from absl import flags
from absl.testing import flagsaver
......@@ -27,12 +29,11 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
FLAGS = flags.FLAGS
class KerasBenchmark(object):
class KerasBenchmark(tf.test.Benchmark):
"""Base benchmark class with methods to simplify testing."""
local_flags = None
def __init__(self, output_dir=None, default_flags=None, flag_methods=None):
self.oss_report_object = None
self.output_dir = output_dir
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
......@@ -56,41 +57,76 @@ class KerasBenchmark(object):
else:
flagsaver.restore_flag_values(KerasBenchmark.local_flags)
def fill_report_object(self, stats, top_1_max=None, top_1_min=None,
log_steps=None, total_batch_size=None, warmup=1):
"""Fills report object to report results.
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None,
log_steps=None,
total_batch_size=None,
warmup=1):
"""Report benchmark results by writing to local protobuf file
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
log_steps: How often the log was created for stats['step_timestamp_log'].
total_batch_size: Global batch-size.
warmup: number of entries in stats['step_timestamp_log'] to ignore.
"""
if self.oss_report_object:
extras = {}
if 'accuracy_top_1' in stats:
self.oss_report_object.add_top_1(stats['accuracy_top_1'],
expected_min=top_1_min,
expected_max=top_1_max)
self.oss_report_object.add_other_quality(
stats['training_accuracy_top_1'],
'top_1_train_accuracy')
if (warmup and
'step_timestamp_log' in stats and
extras['accuracy'] = self._json_description(
stats['accuracy_top_1'],
priority=0,
min_value=top_1_min,
max_value=top_1_max)
extras['top_1_train_accuracy'] = self._json_description(
stats['training_accuracy_top_1'], priority=1)
if (warmup and 'step_timestamp_log' in stats and
len(stats['step_timestamp_log']) > warmup):
# first entry in the time_log is start of step 1. The rest of the
# entries are the end of each step recorded
time_log = stats['step_timestamp_log']
elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
num_examples = (total_batch_size * log_steps * (len(time_log)-warmup-1))
num_examples = (
total_batch_size * log_steps * (len(time_log) - warmup - 1))
examples_per_sec = num_examples / elapsed
self.oss_report_object.add_examples_per_second(examples_per_sec)
extras['exp_per_second'] = self._json_description(
examples_per_sec, priority=2)
if 'avg_exp_per_second' in stats:
self.oss_report_object.add_result(stats['avg_exp_per_second'],
'avg_exp_per_second',
'exp_per_second')
else:
raise ValueError('oss_report_object has not been set.')
extras['avg_exp_per_second'] = self._json_description(
stats['avg_exp_per_second'], priority=3)
self.report_benchmark(iters=-1, wall_time=wall_time_sec, extras=extras)
def _json_description(self,
value,
priority=None,
min_value=None,
max_value=None):
"""Get a json-formatted string describing the attributes for a metric"""
attributes = {}
attributes['value'] = value
if priority:
attributes['priority'] = priority
if min_value:
attributes['min_value'] = min_value
if max_value:
attributes['max_value'] = max_value
if min_value or max_value:
succeeded = True
if min_value and value < min_value:
succeeded = False
if max_value and value > max_value:
succeeded = False
attributes['succeeded'] = succeeded
return json.dumps(attributes)
......@@ -18,6 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from absl import flags
from official.resnet import cifar10_main as cifar_main
......@@ -36,11 +37,12 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
"""Accuracy tests for ResNet56 Keras CIFAR-10."""
def __init__(self, output_dir=None):
flag_methods = [keras_common.define_keras_flags,
cifar_main.define_cifar_flags]
flag_methods = [
keras_common.define_keras_flags, cifar_main.define_cifar_flags
]
super(Resnet56KerasAccuracy, self).__init__(output_dir=output_dir,
flag_methods=flag_methods)
super(Resnet56KerasAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_graph_1_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
......@@ -51,8 +53,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('keras_resnet56_1_gpu')
FLAGS.dtype = 'fp32'
stats = keras_cifar_main.run(FLAGS)
self.fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Test keras based model with eager and distribution strategies."""
......@@ -64,8 +65,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.model_dir = self._get_model_dir('keras_resnet56_eager_1_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
stats = keras_cifar_main.run(flags.FLAGS)
self.fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
......@@ -77,8 +77,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.model_dir = self._get_model_dir('keras_resnet56_eager_2_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
stats = keras_cifar_main.run(FLAGS)
self.fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
......@@ -89,8 +88,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('keras_resnet56_2_gpu')
FLAGS.dtype = 'fp32'
stats = keras_cifar_main.run(FLAGS)
self.fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
......@@ -100,18 +98,21 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.data_dir = DATA_DIR
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'keras_resnet56_no_dist_strat_1_gpu')
FLAGS.model_dir = self._get_model_dir('keras_resnet56_no_dist_strat_1_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_cifar_main.run(FLAGS)
self.fill_report_object(stats, FLAGS.batch_size)
wall_time_sec = time.time() - start_time_sec
def fill_report_object(self, stats, total_batch_size):
super(Resnet56KerasAccuracy, self).fill_report_object(
super(Resnet56KerasAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
total_batch_size=total_batch_size,
total_batch_size=FLAGS.batch_size,
log_steps=100)
......@@ -119,17 +120,25 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
"""Short performance tests for ResNet56 via Keras and CIFAR-10."""
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [keras_common.define_keras_flags,
cifar_main.define_cifar_flags]
flag_methods = [
keras_common.define_keras_flags, cifar_main.define_cifar_flags
]
super(Resnet56KerasBenchmarkBase, self).__init__(
output_dir=output_dir,
flag_methods=flag_methods,
default_flags=default_flags)
def _run_benchmark(self):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_cifar_main.run(FLAGS)
self.fill_report_object(stats)
wall_time_sec = time.time() - start_time_sec
super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
stats,
wall_time_sec,
total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_1_gpu_no_dist_strat(self):
self._setup()
......@@ -137,8 +146,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = True
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
self._setup()
......@@ -146,8 +154,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = True
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
self._setup()
......@@ -155,8 +162,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
self._setup()
......@@ -164,8 +170,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
self._setup()
......@@ -173,8 +178,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
self._setup()
......@@ -182,14 +186,7 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_benchmark()
def fill_report_object(self, stats):
super(Resnet56KerasBenchmarkBase, self).fill_report_object(
stats,
total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
self._run_and_report_benchmark()
class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
......@@ -202,8 +199,8 @@ class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
def_flags['train_steps'] = 110
def_flags['log_steps'] = 10
super(Resnet56KerasBenchmarkSynth, self).__init__(output_dir=output_dir,
default_flags=def_flags)
super(Resnet56KerasBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
......@@ -216,5 +213,5 @@ class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
def_flags['train_steps'] = 110
def_flags['log_steps'] = 10
super(Resnet56KerasBenchmarkReal, self).__init__(output_dir=output_dir,
default_flags=def_flags)
super(Resnet56KerasBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags)
......@@ -16,6 +16,7 @@
from __future__ import print_function
import os
import time
from absl import flags
......@@ -35,43 +36,47 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
"""Benchmark accuracy tests for ResNet50 in Keras."""
def __init__(self, output_dir=None):
flag_methods = [keras_common.define_keras_flags,
imagenet_main.define_imagenet_flags]
flag_methods = [
keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
]
super(Resnet50KerasAccuracy, self).__init__(output_dir=output_dir,
flag_methods=flag_methods)
super(Resnet50KerasAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_graph_8_gpu(self):
"""Test Keras model with Keras fit/dist_strat and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = DATA_DIR
FLAGS.batch_size = 128*8
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.model_dir = self._get_model_dir('keras_resnet50_8_gpu')
FLAGS.dtype = 'fp32'
stats = keras_imagenet_main.run(FLAGS)
self._fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = DATA_DIR
FLAGS.batch_size = 128*8
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.model_dir = self._get_model_dir('keras_resnet50_eager_8_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
stats = keras_imagenet_main.run(FLAGS)
self._fill_report_object(stats, FLAGS.batch_size)
self._run_and_report_benchmark()
def fill_report_object(self, stats, total_batch_size):
super(Resnet50KerasAccuracy, self).fill_report_object(
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_imagenet_main.run(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet50KerasAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
total_batch_size=total_batch_size,
total_batch_size=FLAGS.batch_size,
log_steps=100)
def _get_model_dir(self, folder_name):
......@@ -82,17 +87,25 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
"""Resnet50 benchmarks."""
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [keras_common.define_keras_flags,
imagenet_main.define_imagenet_flags]
flag_methods = [
keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
]
super(Resnet50KerasBenchmarkBase, self).__init__(
output_dir=output_dir,
flag_methods=flag_methods,
default_flags=default_flags)
def _run_benchmark(self):
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = keras_imagenet_main.run(FLAGS)
self.fill_report_object(stats)
wall_time_sec = time.time() - start_time_sec
super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
stats,
wall_time_sec,
total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps)
def benchmark_1_gpu_no_dist_strat(self):
self._setup()
......@@ -101,8 +114,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = True
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
self._setup()
......@@ -111,8 +123,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = True
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
self._setup()
......@@ -121,8 +132,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
self._setup()
......@@ -131,8 +141,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
self._setup()
......@@ -141,8 +150,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_benchmark()
self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self):
self._setup()
......@@ -151,8 +159,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = False
FLAGS.turn_off_distribution_strategy = False
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_benchmark()
self._run_and_report_benchmark()
def fill_report_object(self, stats):
super(Resnet50KerasBenchmarkBase, self).fill_report_object(
......@@ -171,8 +178,8 @@ class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
def_flags['train_steps'] = 110
def_flags['log_steps'] = 10
super(Resnet50KerasBenchmarkSynth, self).__init__(output_dir=output_dir,
default_flags=def_flags)
super(Resnet50KerasBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
......@@ -185,5 +192,5 @@ class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
def_flags['train_steps'] = 110
def_flags['log_steps'] = 10
super(Resnet50KerasBenchmarkReal, self).__init__(output_dir=output_dir,
default_flags=def_flags)
super(Resnet50KerasBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment