"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "d906b1357be51631fa934d61fe13d2e7fc3823bc"
Unverified Commit cf3c2407 authored by Haoyu Zhang's avatar Haoyu Zhang Committed by GitHub
Browse files

Improve performance of Keras ResNet models when not using distribution strategy (#7055)

* Do not set learning phase when skipping eval

* Do not set learning phase in no dist strat case

* Added device placement, tweaked benchmarks

* Added tweaked benchmarks for Cifar

* Fix device scope

* Fix lint

* Add explicit GPU placement flag

* Also run accuracy test with explicit GPU placement

* Added doc string
parent e0e6d981
...@@ -83,6 +83,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -83,6 +83,7 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
"""Test keras based model with eager and no dist strat.""" """Test keras based model with eager and no dist strat."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
FLAGS.explicit_gpu_placement = True
FLAGS.data_dir = self.data_dir FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 FLAGS.batch_size = 128
FLAGS.train_epochs = 182 FLAGS.train_epochs = 182
...@@ -189,6 +190,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -189,6 +190,19 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_tweaked(self):
"""Test no distribution strategy with manual config."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.explicit_gpu_placement = True
FLAGS.distribution_strategy = 'off'
FLAGS.set_learning_phase_to_train = False
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_tweaked')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_graph_1_gpu_no_dist_strat(self): def benchmark_graph_1_gpu_no_dist_strat(self):
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
......
...@@ -168,10 +168,19 @@ def run(flags_obj): ...@@ -168,10 +168,19 @@ def run(flags_obj):
validation_data = eval_input_dataset validation_data = eval_input_dataset
if flags_obj.skip_eval: if flags_obj.skip_eval:
tf.keras.backend.set_learning_phase(1) if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1)
num_eval_steps = None num_eval_steps = None
validation_data = None validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset, history = model.fit(train_input_dataset,
epochs=train_epochs, epochs=train_epochs,
steps_per_epoch=train_steps, steps_per_epoch=train_steps,
...@@ -185,6 +194,10 @@ def run(flags_obj): ...@@ -185,6 +194,10 @@ def run(flags_obj):
eval_output = model.evaluate(eval_input_dataset, eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps, steps=num_eval_steps,
verbose=2) verbose=2)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = keras_common.build_stats(history, eval_output, callbacks) stats = keras_common.build_stats(history, eval_output, callbacks)
return stats return stats
......
...@@ -256,6 +256,15 @@ def define_keras_flags(): ...@@ -256,6 +256,15 @@ def define_keras_flags():
name='run_eagerly', default=False, name='run_eagerly', default=False,
help='Run the model op by op without building a model function.') help='Run the model op by op without building a model function.')
flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?')
# TODO(b/135607288): Remove this flag once we understand the root cause of
# slowdown when setting the learning phase in Keras backend.
flags.DEFINE_boolean(
name='set_learning_phase_to_train', default=True,
help='If skip eval, also set Keras learning phase to 1 (training).')
flags.DEFINE_boolean(
name='explicit_gpu_placement', default=False,
help='If not using distribution strategy, explicitly set device scope '
'for the Keras training loop.')
flags.DEFINE_boolean(name='use_trivial_model', default=False, flags.DEFINE_boolean(name='use_trivial_model', default=False,
help='Whether to use a trivial Keras model.') help='Whether to use a trivial Keras model.')
flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, flags.DEFINE_boolean(name='report_accuracy_metrics', default=True,
......
...@@ -234,6 +234,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -234,6 +234,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_tweaked(self):
"""Test with 1 GPU, no distribution strategy, and manual tuning."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.explicit_gpu_placement = True
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'off'
FLAGS.set_learning_phase_to_train = False
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_no_dist_strat_tweaked')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly.""" """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self._setup() self._setup()
......
...@@ -226,10 +226,19 @@ def run(flags_obj): ...@@ -226,10 +226,19 @@ def run(flags_obj):
# Only build the training graph. This reduces memory usage introduced by # Only build the training graph. This reduces memory usage introduced by
# control flow ops in layers that have different implementations for # control flow ops in layers that have different implementations for
# training and inference (e.g., batch norm). # training and inference (e.g., batch norm).
tf.keras.backend.set_learning_phase(1) if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1)
num_eval_steps = None num_eval_steps = None
validation_data = None validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset, history = model.fit(train_input_dataset,
epochs=train_epochs, epochs=train_epochs,
steps_per_epoch=train_steps, steps_per_epoch=train_steps,
...@@ -244,6 +253,10 @@ def run(flags_obj): ...@@ -244,6 +253,10 @@ def run(flags_obj):
eval_output = model.evaluate(eval_input_dataset, eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps, steps=num_eval_steps,
verbose=2) verbose=2)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = keras_common.build_stats(history, eval_output, callbacks) stats = keras_common.build_stats(history, eval_output, callbacks)
return stats return stats
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment