"cacheflow/core/scheduler.py" did not exist on "fdd0f2f4723afd8c45edacf9357c5a8973767da6"
Unverified Commit 9d8c9aa4 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Single execution path tests for ResNet50, ResNet56, NCF, and Shakespeare LSTM. (#7276)

* Add force_run_distributed tests.

* Added enable_eager

* r/force_run_distributed/force_v2_in_keras_compile

* Adding force_v2 tests and FLAGs.

* Rename method to avoid conflict.

* Add cpu force_v2 tests.

* fix lint, wrap line.

* change to force_v2_in_keras_compile

* Update method name.

* Lower mlperf target to 0.736.
parent 8390b362
......@@ -163,7 +163,8 @@ def define_ncf_flags():
max_train_steps=False,
dtype=False,
all_reduce_alg=False,
enable_xla=True
enable_xla=True,
force_v2_in_keras_compile=True
)
flags_core.define_device(tpu=True)
flags_core.define_benchmark()
......
......@@ -122,12 +122,25 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
......@@ -141,6 +154,13 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v2_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
......@@ -184,6 +204,14 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
......
......@@ -413,7 +413,8 @@ def run_ncf(_):
with distribution_utils.get_strategy_scope(strategy):
keras_model.compile(optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly)
run_eagerly=FLAGS.run_eagerly,
run_distributed=FLAGS.force_v2_in_keras_compile)
history = keras_model.fit(train_input_dataset,
epochs=FLAGS.train_epochs,
......
......@@ -25,7 +25,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet.keras import keras_benchmark
from official.resnet.keras import keras_cifar_main
MIN_TOP_1_ACCURACY = 0.925
MIN_TOP_1_ACCURACY = 0.929
MAX_TOP_1_ACCURACY = 0.938
FLAGS = flags.FLAGS
......@@ -75,6 +75,19 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test keras based model with eager, DS, and force_v2 path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu(self):
"""Test keras based model on CPU."""
self._setup()
......@@ -102,6 +115,22 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Keras on CPU without dist strat but with force v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test keras based model on CPU w/forced eager and no dist_strat."""
self._setup()
......@@ -147,38 +176,69 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
def benchmark_1_gpu_force_v2_run_eagerly(self):
"""No dist strat but forced v2 path via tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test keras based model with eager and distribution strategies."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self):
"""Test keras based model with Keras fit but not distribution strategies."""
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
......@@ -228,6 +288,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test 1 gpu using forced v2 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Test 1 gpu graph."""
self._setup()
......@@ -271,6 +342,33 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
"""Forced v2 execution path and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Test 2 gpu."""
self._setup()
......@@ -335,6 +433,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Test cpu without dist strat and force v2 in model.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self):
"""Test cpu graph mode without distribution strategies."""
self._setup()
......
......@@ -181,7 +181,8 @@ def run(flags_obj):
optimizer=optimizer,
metrics=(['categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, cifar_main.NUM_IMAGES['train'])
......
......@@ -258,7 +258,8 @@ def define_keras_flags(dynamic_loss_scale=True):
dynamic_loss_scale=dynamic_loss_scale,
loss_scale=True,
tf_data_experimental_slack=True,
enable_xla=True)
enable_xla=True,
force_v2_in_keras_compile=True)
flags_core.define_image()
flags_core.define_benchmark()
flags.adopt_module_key_flags(flags_core)
......
......@@ -136,7 +136,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def benchmark_8_gpu_mlperf_like(self):
"""Test similar to the rules for MLPerf 0.5.
......@@ -160,7 +160,7 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def benchmark_xla_8_gpu_fp16_dynamic(self):
"""Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
......@@ -178,9 +178,11 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
# Thread tuning to improve performance.
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.use_tensor_lr = True
self._run_and_report_benchmark()
self._run_and_report_benchmark(top_1_min=0.736)
def _run_and_report_benchmark(self):
def _run_and_report_benchmark(self,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY):
start_time_sec = time.time()
stats = keras_imagenet_main.run(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
......@@ -188,8 +190,8 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
super(Resnet50KerasAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
top_1_min=top_1_min,
top_1_max=top_1_max,
total_batch_size=FLAGS.batch_size,
log_steps=100)
......@@ -261,6 +263,33 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_force_dist_strat_run_eagerly(self):
"""No dist strat but forced ds tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly')
FLAGS.batch_size = 64
FLAGS.force_run_distributed = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_dist_strat(self):
"""No dist strat but forced ds tf.compile path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat')
FLAGS.batch_size = 128
FLAGS.force_run_distributed = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self._setup()
......
......@@ -205,7 +205,8 @@ def run(flags_obj):
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, imagenet_main.NUM_IMAGES['train'])
......
......@@ -148,7 +148,7 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
"""Benchmark 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
......@@ -160,6 +160,19 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_force_v2(self):
"""Benchmark 1 gpu no ds with force_v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.force_v2_in_keras_compile = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
......
......@@ -55,7 +55,8 @@ def define_flags():
synthetic_data=False,
max_train_steps=False,
dtype=False,
enable_xla=True)
enable_xla=True,
force_v2_in_keras_compile=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
......@@ -166,7 +167,8 @@ def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
metrics=[
tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly)
run_eagerly=flags_obj.run_eagerly,
run_distributed=flags_obj.force_v2_in_keras_compile)
callbacks = []
if checkpoint_dir:
......
......@@ -71,7 +71,8 @@ def define_transformer_flags():
dtype=True,
loss_scale=True,
all_reduce_alg=True,
enable_xla=True
enable_xla=True,
force_v2_in_keras_compile=True
)
# Additional performance flags
......
......@@ -61,7 +61,8 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
datasets_num_parallel_batches=False,
dynamic_loss_scale=False, fp16_implementation=False,
loss_scale=False,
tf_data_experimental_slack=False, enable_xla=False):
tf_data_experimental_slack=False, enable_xla=False,
force_v2_in_keras_compile=False):
"""Register flags for specifying performance tuning arguments.
Args:
......@@ -87,6 +88,9 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
tf_data_experimental_slack: Determines whether to enable tf.data's
`experimental_slack` option.
enable_xla: Determines if XLA (auto clustering) is turned on.
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
Returns:
A list of flags for core.py to marks as key flags.
......@@ -276,4 +280,11 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
name="enable_xla", default=False,
help="Whether to enable XLA auto jit compilation")
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=False,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
return key_flags
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment