Unverified Commit 2d4cfad0 authored by Igor's avatar Igor Committed by GitHub
Browse files

Set the --clone_model_in_keras_dist_strat to None. (#6781)

* Set the --clone_model_in_keras_dist_strat to None.  Remove the separate no_cloning benchmarks and add a couple of cloning ones.  Fixes the learning rate schedule to cache its ops per graph.
parent 6aa6bac5
......@@ -121,22 +121,11 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
self._setup()
self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
self._setup()
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_2_gpus(self):
self._setup()
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
def benchmark_2_gpus_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
"""Benchmark NCF model using synthetic data."""
......@@ -175,18 +164,7 @@ class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
self._setup()
self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
self._setup()
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_2_gpus(self):
self._setup()
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
def benchmark_2_gpus_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
......@@ -92,19 +92,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_2_gpu_no_cloning(self):
"""Test keras based model with eager, distributed no-cloning."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_no_cloning')
FLAGS.dtype = 'fp32'
FLAGS.clone_model_in_keras_dist_strat = False
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies."""
self._setup()
......@@ -211,16 +198,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_and_report_benchmark()
def benchmark_2_gpu_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_no_cloning')
FLAGS.batch_size = 128 * 2 # 2 GPUs
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
self._setup()
FLAGS.num_gpus = 2
......
......@@ -101,7 +101,7 @@ class PiecewiseConstantDecayWithWarmup(
self.compute_lr_on_cpu = compute_lr_on_cpu
self.name = name
self.cached_learning_rate_op = None
self.learning_rate_ops_cache = {}
def __call__(self, step):
if tf.executing_eagerly():
......@@ -110,13 +110,14 @@ class PiecewiseConstantDecayWithWarmup(
# In an eager function or graph, the current implementation of optimizer
# repeatedly call and thus create ops for the learning rate schedule. To
# avoid this, we cache the ops if not executing eagerly.
if self.cached_learning_rate_op is None:
graph = tf.compat.v1.get_default_graph()
if graph not in self.learning_rate_ops_cache:
if self.compute_lr_on_cpu:
with tf.device('/device:CPU:0'):
self.cached_learning_rate_op = self._get_learning_rate(step)
self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
else:
self.cached_learning_rate_op = self._get_learning_rate(step)
return self.cached_learning_rate_op
self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
return self.learning_rate_ops_cache[graph]
def _get_learning_rate(self, step):
"""Compute learning rate at given step."""
......@@ -368,7 +369,7 @@ def define_keras_flags():
name='batchnorm_spatial_persistent', default=True,
help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
flags.DEFINE_boolean(
name='clone_model_in_keras_dist_strat', default=True,
name='clone_model_in_keras_dist_strat', default=None,
help='If False, then the experimental code path is used that doesn\'t '
'clone models for distribution.')
......
......@@ -206,18 +206,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
"""Test Keras model with 1 GPU and no-cloning."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_cloning')
FLAGS.batch_size = 128
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU."""
self._setup()
......@@ -407,15 +395,15 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_no_cloning(self):
"""Test Keras model with 8 GPUs and no-cloning."""
def benchmark_8_gpu_cloning(self):
"""Test Keras model with 8 GPUs and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_no_cloning')
FLAGS.clone_model_in_keras_dist_strat = False
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_cloning')
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
......@@ -469,6 +457,19 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning(self):
"""Test Keras model with 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, and manual config tuning."""
self._setup()
......@@ -516,6 +517,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning(self):
"""Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self._setup()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment