Unverified Commit 97622ffc authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

[ResNet / NCF] Test force V1 path and allow V2 path as default (#7383)

* force_v2_in_keras_compile FLAG default to None and added seperate temp path.

* switch to force testing 1v path not force v2 path.

* Rename function force_v1_path.
parent c07abee7
......@@ -136,10 +136,10 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_early_stop(self):
def benchmark_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self):
......@@ -148,11 +148,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_early_stop(self):
def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
......@@ -168,11 +168,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v2_early_stop(self):
def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self):
......@@ -194,13 +194,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
def benchmark_2_gpus_early_stop_force_V2(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.num_gpus = 2
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpus_ctl_early_stop(self):
"""NCF with custom training loop. Works only in TF 2.0."""
self._setup()
......@@ -225,12 +218,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self):
def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
......@@ -281,8 +274,8 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_force_v2_mlperf_like(self):
"""8 GPU using keras fit/compile V2 codepath."""
def benchmark_8_gpu_force_v1_path_mlperf_like(self):
"""8 GPU using keras fit/compile v1 codepath."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.train_epochs = 17
......@@ -291,7 +284,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_8_gpu_mlperf_like(self):
......
......@@ -369,11 +369,17 @@ def run_ncf(_):
else:
with distribution_utils.get_strategy_scope(strategy):
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if FLAGS.force_v2_in_keras_compile is not None:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
else:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly)
history = keras_model.fit(
train_input_dataset,
......
......@@ -75,19 +75,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test keras based model with eager, DS, and force_v2 path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu(self):
"""Test keras based model on CPU."""
self._setup()
......@@ -115,22 +102,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Keras on CPU without dist strat but with force v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test keras based model on CPU w/forced eager and no dist_strat."""
self._setup()
......@@ -188,8 +159,8 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat forced v1 execution path."""
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1
......@@ -197,26 +168,10 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_run_eagerly(self):
"""No dist strat but forced v2 path via tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
......@@ -299,15 +254,15 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test 1 gpu using forced v2 execution path."""
def benchmark_1_gpu_force_v1_path(self):
"""Test 1 gpu using forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
......@@ -353,31 +308,31 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution path."""
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v1 execution path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2')
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
"""Forced v2 execution path and forced eager."""
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v1 execution path and forced eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly')
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
......@@ -444,17 +399,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Test cpu without dist strat and force v2 in model.compile."""
def benchmark_cpu_no_dist_strat_force_v1_path(self):
"""Test cpu without dist strat and force v1 in model.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
'benchmark_cpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self):
......
......@@ -153,13 +153,23 @@ def run(flags_obj):
optimizer = keras_common.get_optimizer()
model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
model.compile(
loss='categorical_crossentropy',
optimizer=optimizer,
metrics=(['categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='categorical_crossentropy',
optimizer=optimizer,
metrics=(['categorical_crossentropy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='categorical_crossentropy',
optimizer=optimizer,
metrics=(['categorical_crossentropy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, cifar_preprocessing.NUM_IMAGES['train'])
......
......@@ -79,23 +79,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.use_tensor_lr = True
self._run_and_report_benchmark()
def benchmark_8_gpu_force_v2(self):
"""Test Keras model with eager, dist_strat, force v2 and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
# Add some thread tunings to improve performance.
FLAGS.datasets_num_private_threads = 14
FLAGS.use_tensor_lr = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self):
"""Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
self._setup()
......@@ -294,8 +277,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self):
"""Forced v2 execution in tf.compile path and force eager."""
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v1 execution in tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
......@@ -303,13 +286,13 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly')
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly_tweaked(self):
"""Forced v2 execution in tf.compile path and force eager."""
def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked(self):
"""Forced v1 execution in tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
......@@ -318,22 +301,22 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.explicit_gpu_placement = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly_tweaked')
'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked')
FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self):
"""No dist strat but forced v2 execution tf.compile path."""
def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v1 execution tf.compile path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat')
'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True
FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
......@@ -572,18 +555,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_force_v2(self):
"""Test Keras model with 8 GPUs and v2 codepath."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
FLAGS.batch_size = 128 * 8 # 8 GPUs
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning and 8 GPUs."""
self._setup()
......
......@@ -189,13 +189,23 @@ def run(flags_obj):
model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES, dtype=dtype)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
callbacks = keras_common.get_callbacks(
learning_rate_schedule, imagenet_preprocessing.NUM_IMAGES['train'])
......
......@@ -282,7 +282,7 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
if force_v2_in_keras_compile:
flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=False,
name="force_v2_in_keras_compile", default=None,
help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment