Unverified Commit 9d8c9aa4 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Single execution path tests for ResNet50, ResNet56, NCF, and Shakespeare LSTM. (#7276)

* Add force_run_distributed tests.

* Added enable_eager

* r/force_run_distributed/force_v2_in_keras_compile

* Adding force_v2 tests and FLAGs.

* Rename method to avoid conflict.

* Add cpu force_v2 tests.

* fix lint, wrap line.

* change to force_v2_in_keras_compile

* Update method name.

* Lower mlperf target to 0.736.
parent 8390b362
......@@ -163,7 +163,8 @@ def define_ncf_flags():
max_train_steps=False,
dtype=False,
all_reduce_alg=False,
enable_xla=True
enable_xla=True,
force_v2_in_keras_compile=True
)
flags_core.define_device(tpu=True)
flags_core.define_benchmark()
......
......@@ -122,12 +122,25 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
......@@ -141,6 +154,13 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v2_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
......@@ -184,6 +204,14 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
......
......@@ -413,7 +413,8 @@ def run_ncf(_):
with distribution_utils.get_strategy_scope(strategy):
keras_model.compile(optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly)
run_eagerly=FLAGS.run_eagerly,
run_distributed=FLAGS.force_v2_in_keras_compile)
history = keras_model.fit(train_input_dataset,
epochs=FLAGS.train_epochs,
......
......@@ -25,7 +25,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet.keras import keras_benchmark
from official.resnet.keras import keras_cifar_main
MIN_TOP_1_ACCURACY = 0.925
MIN_TOP_1_ACCURACY = 0.929
MAX_TOP_1_ACCURACY = 0.938
FLAGS = flags.FLAGS
......@@ -75,6 +75,19 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test keras based model with eager, DS, and force_v2 path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu(self):
"""Test keras based model on CPU."""
self._setup()
......@@ -102,6 +115,22 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Keras on CPU without dist strat but with force v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test keras based model on CPU w/forced eager and no dist_strat."""
self._setup()
......@@ -147,38 +176,69 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
def benchmark_1_gpu_force_v2_run_eagerly(self):
"""No dist strat but forced v2 path via tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
......@@ -228,6 +288,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test 1 gpu using forced v2 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Test 1 gpu graph."""
self._setup()
......@@ -271,6 +342,33 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
"""Forced v2 execution path and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test 2 gpu."""
self._setup()
......@@ -335,6 +433,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Test cpu without dist strat and force v2 in model.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self):
"""Test cpu graph mode without distribution strategies."""
self._setup()
......
......@@ -181,7 +181,8 @@ def run(flags_obj):
optimizer=optimizer,
metrics=(['categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, cifar_main.NUM_IMAGES['train'])
......
......@@ -258,7 +258,8 @@ def define_keras_flags(dynamic_loss_scale=True):
dynamic_loss_scale=dynamic_loss_scale,
loss_scale=True,
tf_data_experimental_slack=True,
enable_xla=True)
enable_xla=True,
force_v2_in_keras_compile=True)
flags_core.define_image()
flags_core.define_benchmark()
flags.adopt_module_key_flags(flags_core)
......
......@@ -136,7 +136,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def benchmark_8_gpu_mlperf_like(self):
"""Test similar to the rules for MLPerf 0.5.
......@@ -160,7 +160,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def benchmark_xla_8_gpu_fp16_dynamic(self):
"""Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
......@@ -178,9 +178,11 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
# Thread tuning to improve performance.
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.use_tensor_lr = True
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def _run_and_report_benchmark(self):
def _run_and_report_benchmark(self,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY):
start_time_sec = time.time()
stats = keras_imagenet_main.run(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
......@@ -188,8 +190,8 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
super(Resnet50KerasAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
top_1_min=top_1_min,
top_1_max=top_1_max,
total_batch_size=FLAGS.batch_size,
log_steps=100)
......@@ -261,6 +263,33 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_force_dist_strat_run_eagerly(self):
"""No dist strat but forced ds tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly')
FLAGS.batch_size = 64
FLAGS.force_run_distributed = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_dist_strat(self):
"""No dist strat but forced ds tf.compile path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat')
FLAGS.batch_size = 128
FLAGS.force_run_distributed = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self._setup()
......
......@@ -205,7 +205,8 @@ def run(flags_obj):
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, imagenet_main.NUM_IMAGES['train'])
......
......@@ -148,7 +148,7 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
"""Benchmark 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
......@@ -160,6 +160,19 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds with force_v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
......
......@@ -55,7 +55,8 @@ def define_flags():
synthetic_data=False,
max_train_steps=False,
dtype=False,
enable_xla=True)
enable_xla=True,
force_v2_in_keras_compile=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
......@@ -166,7 +167,8 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
metrics=[
tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = []
if checkpoint_dir:
......
......@@ -71,7 +71,8 @@ def define_transformer_flags():
dtype=True,
loss_scale=True,
all_reduce_alg=True,
enable_xla=True
enable_xla=True,
force_v2_in_keras_compile=True
)
# Additional performance flags
......
......@@ -61,7 +61,8 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
datasets_num_parallel_batches=False,
dynamic_loss_scale=False, fp16_implementation=False,
loss_scale=False,
tf_data_experimental_slack=False, enable_xla=False):
tf_data_experimental_slack=False, enable_xla=False,
force_v2_in_keras_compile=False):
"""Register flags for specifying performance tuning arguments.
Args:
......@@ -87,6 +88,9 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
tf_data_experimental_slack: Determines whether to enable tf.data's
`experimental_slack` option.
enable_xla: Determines if XLA (auto clustering) is turned on.
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
Returns:
A list of flags for core.py to marks as key flags.
......@@ -276,4 +280,11 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
name="enable_xla", default=False,
help="Whether to enable XLA auto jit compilation")
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=False,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
return key_flags
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment