"examples/git@developer.sourcefind.cn:orangecat/ollama.git" did not exist on "29715dbca7dec710bf7127e5d788c8009e892735"
Unverified Commit 6f47c378 authored by Igor's avatar Igor Committed by GitHub
Browse files

Merged commit includes the following changes: (#7264)

259030078  by isaprykin<isaprykin@google.com>:

    Clean up the --clone_model_in_keras_dist_strat from Keras Resnet.

    The cloning flag has been removed.  The current rule is that cloning is only done in graph mode.  That resulted in duplicate benchmarks: eager+no-cloning vs eager+cloning.  I removed eager+cloning ones.

--
259026454  by isaprykin<isaprykin@google.com>:

    Internal change

PiperOrigin-RevId: 259030078
parent c5a4978d
...@@ -306,10 +306,6 @@ def define_keras_flags(dynamic_loss_scale=True): ...@@ -306,10 +306,6 @@ def define_keras_flags(dynamic_loss_scale=True):
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='batchnorm_spatial_persistent', default=True, name='batchnorm_spatial_persistent', default=True,
help='Enable the spacial persistent mode for CuDNN batch norm kernel.') help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
flags.DEFINE_boolean(
name='clone_model_in_keras_dist_strat', default=None,
help='If False, then the experimental code path is used that doesn\'t '
'clone models for distribution.')
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='enable_get_next_as_optional', default=False, name='enable_get_next_as_optional', default=False,
help='Enable get_next_as_optional behavior in DistributedIterator.') help='Enable get_next_as_optional behavior in DistributedIterator.')
......
...@@ -534,18 +534,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -534,18 +534,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_cloning(self):
"""Test Keras model with 8 GPUs and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_cloning')
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning and 8 GPUs.""" """Test Keras model with manual config tuning and 8 GPUs."""
self._setup() self._setup()
...@@ -624,19 +612,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -624,19 +612,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning(self):
"""Test Keras model with 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_tweaked(self): def benchmark_8_gpu_fp16_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, and manual config tuning.""" """Test Keras model with 8 GPUs, fp16, and manual config tuning."""
self._setup() self._setup()
...@@ -669,23 +644,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -669,23 +644,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, cloning, and manual config tuning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_8_gpu_fp16_cloning_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_dynamic_tweaked(self): def benchmark_8_gpu_fp16_dynamic_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" """Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
self._setup() self._setup()
...@@ -748,20 +706,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -748,20 +706,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning(self):
"""Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_cloning')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked(self): def benchmark_xla_8_gpu_fp16_tweaked(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self._setup() self._setup()
...@@ -778,26 +722,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -778,26 +722,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.datasets_num_private_threads = 48 FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked(self): def benchmark_xla_8_gpu_fp16_tweaked_layout_off(self):
"""Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning.""" """Test with tuning, FP16+XLA, and layout_off."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_cloning_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True
# FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked_layout_off(self):
"""Test with tuning, FP16+XLA, cloning, and layout_off."""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -805,37 +731,14 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -805,37 +731,14 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.enable_xla = True FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default' FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_cloning_tweaked_layout_off') 'benchmark_xla_8_gpu_fp16_tweaked_layout_off')
FLAGS.batch_size = 256 * 8 FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.enable_grappler_layout_optimizer = False FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last' FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked_optional_next(self):
"""Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning.
This test also enables get_next_as_optional.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_cloning_tweaked_optional_next')
FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True
# FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
FLAGS.enable_get_next_as_optional = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self): def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self):
"""Test with manual config tuning, XLA, 8 GPUs and fp16. """Test with manual config tuning, XLA, 8 GPUs and fp16.
...@@ -856,28 +759,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -856,28 +759,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.train_steps = 310 FLAGS.train_steps = 310
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure(self):
"""Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning.
Delay performance measurement for stable performance on 96 vCPU platforms.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure')
FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
FLAGS.train_steps = 310
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked_optional_next(self): def benchmark_xla_8_gpu_fp16_tweaked_optional_next(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs, fp16. """Test Keras model with manual config tuning, XLA, 8 GPUs, fp16.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment