Unverified Commit 97622ffc authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

[ResNet / NCF] Test force V1 path and allow V2 path as default (#7383)

* force_v2_in_keras_compile FLAG default to None and added seperate temp path.

* switch to force testing 1v path not force v2 path.

* Rename function force_v1_path.
parent c07abee7
...@@ -136,10 +136,10 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -136,10 +136,10 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True FLAGS.early_stopping = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_early_stop(self): def benchmark_1_gpu_force_v1_path_early_stop(self):
self._setup() self._setup()
FLAGS.early_stopping = True FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self): def benchmark_1_gpu_no_dist_strat_early_stop(self):
...@@ -148,11 +148,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -148,11 +148,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.early_stopping = True FLAGS.early_stopping = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_early_stop(self): def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
self._setup() self._setup()
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True FLAGS.early_stopping = True
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self): def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
...@@ -168,11 +168,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -168,11 +168,11 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.enable_xla = True FLAGS.enable_xla = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_force_v2_early_stop(self): def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
self._setup() self._setup()
FLAGS.early_stopping = True FLAGS.early_stopping = True
FLAGS.enable_xla = True FLAGS.enable_xla = True
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self): def benchmark_1_gpu_ctl_early_stop(self):
...@@ -194,13 +194,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -194,13 +194,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpus_early_stop_force_V2(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.num_gpus = 2
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_2_gpus_ctl_early_stop(self): def benchmark_2_gpus_ctl_early_stop(self):
"""NCF with custom training loop. Works only in TF 2.0.""" """NCF with custom training loop. Works only in TF 2.0."""
self._setup() self._setup()
...@@ -225,12 +218,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -225,12 +218,12 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.train_epochs = 7 FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_force_v2_mlperf_like(self): def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat.""" """1 GPU using compile/fit without dist_strat."""
self._setup() self._setup()
FLAGS.train_epochs = 7 FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self): def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
...@@ -281,8 +274,8 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -281,8 +274,8 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.epsilon = 1e-8 FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_force_v2_mlperf_like(self): def benchmark_8_gpu_force_v1_path_mlperf_like(self):
"""8 GPU using keras fit/compile V2 codepath.""" """8 GPU using keras fit/compile v1 codepath."""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
FLAGS.train_epochs = 17 FLAGS.train_epochs = 17
...@@ -291,7 +284,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -291,7 +284,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.beta1 = 0.25 FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5 FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8 FLAGS.epsilon = 1e-8
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_8_gpu_mlperf_like(self): def benchmark_xla_8_gpu_mlperf_like(self):
......
...@@ -369,11 +369,17 @@ def run_ncf(_): ...@@ -369,11 +369,17 @@ def run_ncf(_):
else: else:
with distribution_utils.get_strategy_scope(strategy): with distribution_utils.get_strategy_scope(strategy):
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if FLAGS.force_v2_in_keras_compile is not None:
keras_model.compile( keras_model.compile(
optimizer=optimizer, optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly, run_eagerly=FLAGS.run_eagerly,
experimental_run_tf_function=FLAGS.force_v2_in_keras_compile) experimental_run_tf_function=FLAGS.force_v2_in_keras_compile)
else:
keras_model.compile(
optimizer=optimizer,
run_eagerly=FLAGS.run_eagerly)
history = keras_model.fit( history = keras_model.fit(
train_input_dataset, train_input_dataset,
......
...@@ -75,19 +75,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -75,19 +75,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True FLAGS.enable_eager = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self):
"""Test keras based model with eager, DS, and force_v2 path."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu(self): def benchmark_cpu(self):
"""Test keras based model on CPU.""" """Test keras based model on CPU."""
self._setup() self._setup()
...@@ -115,22 +102,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -115,22 +102,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self):
"""Keras on CPU without dist strat but with force v2 in keras.compile."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_run_eagerly(self): def benchmark_cpu_no_dist_strat_run_eagerly(self):
"""Test keras based model on CPU w/forced eager and no dist_strat.""" """Test keras based model on CPU w/forced eager and no dist_strat."""
self._setup() self._setup()
...@@ -188,8 +159,8 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -188,8 +159,8 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp32' FLAGS.dtype = 'fp32'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self): def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v2 execution path.""" """No dist strat forced v1 execution path."""
self._setup() self._setup()
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -197,26 +168,10 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -197,26 +168,10 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.train_epochs = 182 FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2') 'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32' FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2_run_eagerly(self):
"""No dist strat but forced v2 path via tf.compile path and force eager."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_v2_run_eagerly')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpu(self): def benchmark_2_gpu(self):
...@@ -299,15 +254,15 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -299,15 +254,15 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_force_v2(self): def benchmark_1_gpu_force_v1_path(self):
"""Test 1 gpu using forced v2 execution path.""" """Test 1 gpu using forced v1 execution path."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default' FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self): def benchmark_graph_1_gpu(self):
...@@ -353,31 +308,31 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -353,31 +308,31 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self): def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v2 execution path.""" """No dist strat but forced v1 execution path."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2') 'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.dtype = 'fp32' FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self): def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v2 execution path and forced eager.""" """Forced v1 execution path and forced eager."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly') 'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.dtype = 'fp32' FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.run_eagerly = True FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpu(self): def benchmark_2_gpu(self):
...@@ -444,17 +399,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -444,17 +399,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_cpu_no_dist_strat_force_v2(self): def benchmark_cpu_no_dist_strat_force_v1_path(self):
"""Test cpu without dist strat and force v2 in model.compile.""" """Test cpu without dist strat and force v1 in model.compile."""
self._setup() self._setup()
FLAGS.num_gpus = 0 FLAGS.num_gpus = 0
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_cpu_no_dist_strat_force_v2') 'benchmark_cpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_cpu_no_dist_strat(self): def benchmark_graph_cpu_no_dist_strat(self):
......
...@@ -153,13 +153,23 @@ def run(flags_obj): ...@@ -153,13 +153,23 @@ def run(flags_obj):
optimizer = keras_common.get_optimizer() optimizer = keras_common.get_optimizer()
model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES) model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile( model.compile(
loss='categorical_crossentropy', loss='categorical_crossentropy',
optimizer=optimizer, optimizer=optimizer,
metrics=(['categorical_accuracy'] metrics=(['categorical_crossentropy']
if flags_obj.report_accuracy_metrics else None), if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly, run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile) experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='categorical_crossentropy',
optimizer=optimizer,
metrics=(['categorical_crossentropy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
callbacks = keras_common.get_callbacks( callbacks = keras_common.get_callbacks(
learning_rate_schedule, cifar_preprocessing.NUM_IMAGES['train']) learning_rate_schedule, cifar_preprocessing.NUM_IMAGES['train'])
......
...@@ -79,23 +79,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -79,23 +79,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_force_v2(self):
"""Test Keras model with eager, dist_strat, force v2 and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
FLAGS.dtype = 'fp32'
FLAGS.enable_eager = True
# Add some thread tunings to improve performance.
FLAGS.datasets_num_private_threads = 14
FLAGS.use_tensor_lr = True
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self): def benchmark_8_gpu_fp16(self):
"""Test Keras model with eager, dist_strat, 8 GPUs, and fp16.""" """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
self._setup() self._setup()
...@@ -294,8 +277,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -294,8 +277,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 64 FLAGS.batch_size = 64
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly(self): def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
"""Forced v2 execution in tf.compile path and force eager.""" """Forced v1 execution in tf.compile path and force eager."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -303,13 +286,13 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -303,13 +286,13 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.run_eagerly = True FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly') 'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
FLAGS.batch_size = 64 FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2_run_eagerly_tweaked(self): def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked(self):
"""Forced v2 execution in tf.compile path and force eager.""" """Forced v1 execution in tf.compile path and force eager."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -318,22 +301,22 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -318,22 +301,22 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.explicit_gpu_placement = True FLAGS.explicit_gpu_placement = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat_run_eagerly_tweaked') 'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly_tweaked')
FLAGS.batch_size = 64 FLAGS.batch_size = 64
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_force_v2(self): def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
"""No dist strat but forced v2 execution tf.compile path.""" """No dist strat but forced v1 execution tf.compile path."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off' FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_force_dist_strat') 'benchmark_1_gpu_no_dist_strat_force_v1_path')
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.force_v2_in_keras_compile = True FLAGS.force_v2_in_keras_compile = False
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
...@@ -572,18 +555,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -572,18 +555,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_force_v2(self):
"""Test Keras model with 8 GPUs and v2 codepath."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_force_v2')
FLAGS.batch_size = 128 * 8 # 8 GPUs
FLAGS.force_v2_in_keras_compile = True
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning and 8 GPUs.""" """Test Keras model with manual config tuning and 8 GPUs."""
self._setup() self._setup()
......
...@@ -189,6 +189,9 @@ def run(flags_obj): ...@@ -189,6 +189,9 @@ def run(flags_obj):
model = resnet_model.resnet50( model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES, dtype=dtype) num_classes=imagenet_preprocessing.NUM_CLASSES, dtype=dtype)
# TODO(b/138957587): Remove when force_v2_in_keras_compile is on longer
# a valid arg for this model. Also remove as a valid flag.
if flags_obj.force_v2_in_keras_compile is not None:
model.compile( model.compile(
loss='sparse_categorical_crossentropy', loss='sparse_categorical_crossentropy',
optimizer=optimizer, optimizer=optimizer,
...@@ -196,6 +199,13 @@ def run(flags_obj): ...@@ -196,6 +199,13 @@ def run(flags_obj):
if flags_obj.report_accuracy_metrics else None), if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly, run_eagerly=flags_obj.run_eagerly,
experimental_run_tf_function=flags_obj.force_v2_in_keras_compile) experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)
else:
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
callbacks = keras_common.get_callbacks( callbacks = keras_common.get_callbacks(
learning_rate_schedule, imagenet_preprocessing.NUM_IMAGES['train']) learning_rate_schedule, imagenet_preprocessing.NUM_IMAGES['train'])
......
...@@ -282,7 +282,7 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True, ...@@ -282,7 +282,7 @@ def define_performance(num_parallel_calls=True, inter_op=True, intra_op=True,
if force_v2_in_keras_compile: if force_v2_in_keras_compile:
flags.DEFINE_boolean( flags.DEFINE_boolean(
name="force_v2_in_keras_compile", default=False, name="force_v2_in_keras_compile", default=None,
help="Forces the use of run_distribued path even if not" help="Forces the use of run_distribued path even if not"
"using a `strategy`. This is not the same as" "using a `strategy`. This is not the same as"
"`tf.distribute.OneDeviceStrategy`") "`tf.distribute.OneDeviceStrategy`")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment