Unverified Commit cf3c2407 authored by Haoyu Zhang's avatar Haoyu Zhang Committed by GitHub
Browse files

Improve performance of Keras ResNet models when not using distribution strategy (#7055)

* Do not set learning phase when skipping eval

* Do not set learning phase in no dist strat case

* Added device placement, tweaked benchmarks

* Added tweaked benchmarks for Cifar

* Fix device scope

* Fix lint

* Add explicit GPU placement flag

* Also run accuracy test with explicit GPU placement

* Added doc string
parent e0e6d981
...@@ -83,6 +83,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -83,6 +83,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
"""Test keras based model with eager and no dist strat.""" """Test keras based model with eager and no dist strat."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.explicit_gpu_placement = True
FLAGS.data_dir = self.data_dir FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.train_epochs = 182 FLAGS.train_epochs = 182
...@@ -189,6 +190,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -189,6 +190,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_tweaked(self):
"""Test no distribution strategy with manual config."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.explicit_gpu_placement = True
FLAGS.distribution_strategy = 'off'
FLAGS.set_learning_phase_to_train = False
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_tweaked')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self): def benchmark_graph_1_gpu_no_dist_strat(self):
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
......
...@@ -168,10 +168,19 @@ def run(flags_obj): ...@@ -168,10 +168,19 @@ def run(flags_obj):
validation_data = eval_input_dataset validation_data = eval_input_dataset
if flags_obj.skip_eval: if flags_obj.skip_eval:
if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1) tf.keras.backend.set_learning_phase(1)
num_eval_steps = None num_eval_steps = None
validation_data = None validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset, history = model.fit(train_input_dataset,
epochs=train_epochs, epochs=train_epochs,
steps_per_epoch=train_steps, steps_per_epoch=train_steps,
...@@ -185,6 +194,10 @@ def run(flags_obj): ...@@ -185,6 +194,10 @@ def run(flags_obj):
eval_output = model.evaluate(eval_input_dataset, eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps, steps=num_eval_steps,
verbose=2) verbose=2)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = keras_common.build_stats(history, eval_output, callbacks) stats = keras_common.build_stats(history, eval_output, callbacks)
return stats return stats
......
...@@ -256,6 +256,15 @@ def define_keras_flags(): ...@@ -256,6 +256,15 @@ def define_keras_flags():
name='run_eagerly', default=False, name='run_eagerly', default=False,
help='Run the model op by op without building a model function.') help='Run the model op by op without building a model function.')
flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?')
# TODO(b/135607288): Remove this flag once we understand the root cause of
# slowdown when setting the learning phase in Keras backend.
flags.DEFINE_boolean(
name='set_learning_phase_to_train', default=True,
help='If skip eval, also set Keras learning phase to 1 (training).')
flags.DEFINE_boolean(
name='explicit_gpu_placement', default=False,
help='If not using distribution strategy, explicitly set device scope '
'for the Keras training loop.')
flags.DEFINE_boolean(name='use_trivial_model', default=False, flags.DEFINE_boolean(name='use_trivial_model', default=False,
help='Whether to use a trivial Keras model.') help='Whether to use a trivial Keras model.')
flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, flags.DEFINE_boolean(name='report_accuracy_metrics', default=True,
......
...@@ -234,6 +234,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -234,6 +234,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_tweaked(self):
"""Test with 1 GPU, no distribution strategy, and manual tuning."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.explicit_gpu_placement = True
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.set_learning_phase_to_train = False
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_tweaked')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self._setup() self._setup()
......
...@@ -226,10 +226,19 @@ def run(flags_obj): ...@@ -226,10 +226,19 @@ def run(flags_obj):
# Only build the training graph. This reduces memory usage introduced by # Only build the training graph. This reduces memory usage introduced by
# control flow ops in layers that have different implementations for # control flow ops in layers that have different implementations for
# training and inference (e.g., batch norm). # training and inference (e.g., batch norm).
if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1) tf.keras.backend.set_learning_phase(1)
num_eval_steps = None num_eval_steps = None
validation_data = None validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset, history = model.fit(train_input_dataset,
epochs=train_epochs, epochs=train_epochs,
steps_per_epoch=train_steps, steps_per_epoch=train_steps,
...@@ -244,6 +253,10 @@ def run(flags_obj): ...@@ -244,6 +253,10 @@ def run(flags_obj):
eval_output = model.evaluate(eval_input_dataset, eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps, steps=num_eval_steps,
verbose=2) verbose=2)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = keras_common.build_stats(history, eval_output, callbacks) stats = keras_common.build_stats(history, eval_output, callbacks)
return stats return stats
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment