Unverified Commit 2d4cfad0 authored by Igor's avatar Igor Committed by GitHub
Browse files

Set the --clone_model_in_keras_dist_strat to None. (#6781)

* Set the --clone_model_in_keras_dist_strat to None.  Remove the separate no_cloning benchmarks and add a couple of cloning ones.  Fixes the learning rate schedule to cache its ops per graph.
parent 6aa6bac5
...@@ -121,22 +121,11 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -121,22 +121,11 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
self._setup() self._setup()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
self._setup()
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_2_gpus(self): def benchmark_2_gpus(self):
self._setup() self._setup()
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpus_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
class KerasNCFSyntheticData(KerasNCFBenchmarkBase): class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
"""Benchmark NCF model using synthetic data.""" """Benchmark NCF model using synthetic data."""
...@@ -175,18 +164,7 @@ class KerasNCFSyntheticData(KerasNCFBenchmarkBase): ...@@ -175,18 +164,7 @@ class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
self._setup() self._setup()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
self._setup()
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_2_gpus(self): def benchmark_2_gpus(self):
self._setup() self._setup()
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpus_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
...@@ -92,19 +92,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -92,19 +92,6 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True FLAGS.enable_eager = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpu_no_cloning(self):
"""Test keras based model with eager, distributed no-cloning."""
self._setup()
FLAGS.num_gpus = 2
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128
FLAGS.train_epochs = 182
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_no_cloning')
FLAGS.dtype = 'fp32'
FLAGS.clone_model_in_keras_dist_strat = False
FLAGS.enable_eager = True
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self): def benchmark_graph_2_gpu(self):
"""Test keras based model with Keras fit and distribution strategies.""" """Test keras based model with Keras fit and distribution strategies."""
self._setup() self._setup()
...@@ -211,16 +198,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -211,16 +198,6 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 2 # 2 GPUs FLAGS.batch_size = 128 * 2 # 2 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpu_no_cloning(self):
self._setup()
FLAGS.num_gpus = 2
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_no_cloning')
FLAGS.batch_size = 128 * 2 # 2 GPUs
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self): def benchmark_graph_2_gpu(self):
self._setup() self._setup()
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
......
...@@ -101,7 +101,7 @@ class PiecewiseConstantDecayWithWarmup( ...@@ -101,7 +101,7 @@ class PiecewiseConstantDecayWithWarmup(
self.compute_lr_on_cpu = compute_lr_on_cpu self.compute_lr_on_cpu = compute_lr_on_cpu
self.name = name self.name = name
self.cached_learning_rate_op = None self.learning_rate_ops_cache = {}
def __call__(self, step): def __call__(self, step):
if tf.executing_eagerly(): if tf.executing_eagerly():
...@@ -110,13 +110,14 @@ class PiecewiseConstantDecayWithWarmup( ...@@ -110,13 +110,14 @@ class PiecewiseConstantDecayWithWarmup(
# In an eager function or graph, the current implementation of optimizer # In an eager function or graph, the current implementation of optimizer
# repeatedly call and thus create ops for the learning rate schedule. To # repeatedly call and thus create ops for the learning rate schedule. To
# avoid this, we cache the ops if not executing eagerly. # avoid this, we cache the ops if not executing eagerly.
if self.cached_learning_rate_op is None: graph = tf.compat.v1.get_default_graph()
if graph not in self.learning_rate_ops_cache:
if self.compute_lr_on_cpu: if self.compute_lr_on_cpu:
with tf.device('/device:CPU:0'): with tf.device('/device:CPU:0'):
self.cached_learning_rate_op = self._get_learning_rate(step) self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
else: else:
self.cached_learning_rate_op = self._get_learning_rate(step) self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
return self.cached_learning_rate_op return self.learning_rate_ops_cache[graph]
def _get_learning_rate(self, step): def _get_learning_rate(self, step):
"""Compute learning rate at given step.""" """Compute learning rate at given step."""
...@@ -368,7 +369,7 @@ def define_keras_flags(): ...@@ -368,7 +369,7 @@ def define_keras_flags():
name='batchnorm_spatial_persistent', default=True, name='batchnorm_spatial_persistent', default=True,
help='Enable the spacial persistent mode for CuDNN batch norm kernel.') help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='clone_model_in_keras_dist_strat', default=True, name='clone_model_in_keras_dist_strat', default=None,
help='If False, then the experimental code path is used that doesn\'t ' help='If False, then the experimental code path is used that doesn\'t '
'clone models for distribution.') 'clone models for distribution.')
......
...@@ -206,18 +206,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -206,18 +206,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_no_cloning(self):
"""Test Keras model with 1 GPU and no-cloning."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_cloning')
FLAGS.batch_size = 128
FLAGS.clone_model_in_keras_dist_strat = False
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -407,15 +395,15 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -407,15 +395,15 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_no_cloning(self): def benchmark_8_gpu_cloning(self):
"""Test Keras model with 8 GPUs and no-cloning.""" """Test Keras model with 8 GPUs and cloning."""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default' FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_no_cloning') FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.clone_model_in_keras_dist_strat = False FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_cloning')
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -469,6 +457,19 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -469,6 +457,19 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning(self):
"""Test Keras model with 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_tweaked(self): def benchmark_8_gpu_fp16_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, and manual config tuning.""" """Test Keras model with 8 GPUs, fp16, and manual config tuning."""
self._setup() self._setup()
...@@ -516,6 +517,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -516,6 +517,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning(self):
"""Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked(self): def benchmark_xla_8_gpu_fp16_tweaked(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self._setup() self._setup()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment