Unverified Commit 152baba5 authored by Haoyu Zhang's avatar Haoyu Zhang Committed by GitHub
Browse files

Modify tweaked tests for better performance in no cloning mode (#6965)

* Modify tweaked tests for better performance in no cloning mode

* Tweak trivial models
parent e4bf28fc
...@@ -139,9 +139,8 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -139,9 +139,8 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
FLAGS.dtype = 'fp16' FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.enable_xla = True FLAGS.enable_xla = True
# Tweaks to improve performance.
FLAGS.data_delay_prefetch = True
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_mlperf_like(self): def benchmark_8_gpu_mlperf_like(self):
...@@ -342,7 +341,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -342,7 +341,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 FLAGS.batch_size = 256
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_slack(self): def benchmark_xla_1_gpu_fp16_slack(self):
...@@ -491,7 +489,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -491,7 +489,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.datasets_num_private_threads = 14 FLAGS.datasets_num_private_threads = 14
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_slack(self): def benchmark_8_gpu_slack(self):
...@@ -531,7 +528,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -531,7 +528,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 24 FLAGS.datasets_num_private_threads = 24
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self): def benchmark_8_gpu_fp16(self):
...@@ -571,7 +567,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -571,7 +567,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning_tweaked(self): def benchmark_8_gpu_fp16_cloning_tweaked(self):
...@@ -605,7 +600,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -605,7 +600,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.loss_scale = 'dynamic' FLAGS.loss_scale = 'dynamic'
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_optional_next(self): def benchmark_xla_8_gpu_fp16_optional_next(self):
...@@ -665,8 +659,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -665,8 +659,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked') FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
# FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked(self): def benchmark_xla_8_gpu_fp16_cloning_tweaked(self):
...@@ -723,10 +717,9 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -723,10 +717,9 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'default' FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_tweaked_delay_measure') 'benchmark_xla_8_gpu_fp16_tweaked_delay_measure')
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
FLAGS.train_steps = 310 FLAGS.train_steps = 310
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -768,8 +761,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -768,8 +761,8 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
'benchmark_xla_8_gpu_fp16_tweaked_optional_next') 'benchmark_xla_8_gpu_fp16_tweaked_optional_next')
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
# FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True FLAGS.datasets_num_private_threads = 48
FLAGS.enable_get_next_as_optional = True FLAGS.enable_get_next_as_optional = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -805,7 +798,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -805,7 +798,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.loss_scale = 'dynamic' FLAGS.loss_scale = 'dynamic'
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self): def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self):
...@@ -822,7 +815,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -822,7 +815,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True FLAGS.datasets_num_private_threads = 48
FLAGS.enable_tensorboard = True FLAGS.enable_tensorboard = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -1044,8 +1037,10 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1044,8 +1037,10 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True) lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
] ]
def_flags = {} def_flags = {}
def_flags['use_trivial_model'] = True
def_flags['skip_eval'] = True def_flags['skip_eval'] = True
def_flags['report_accuracy_metrics'] = False def_flags['report_accuracy_metrics'] = False
def_flags['use_tensor_lr'] = True
def_flags['dtype'] = 'fp16' def_flags['dtype'] = 'fp16'
def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet') def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
def_flags['train_steps'] = 600 def_flags['train_steps'] = 600
...@@ -1075,7 +1070,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1075,7 +1070,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
FLAGS.enable_eager = True FLAGS.enable_eager = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup')
FLAGS.batch_size = 256 FLAGS.batch_size = 256 * 8
FLAGS.train_steps = 700 FLAGS.train_steps = 700
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -1124,21 +1119,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1124,21 +1119,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
FLAGS.batch_size = 256 * 8 FLAGS.batch_size = 256 * 8
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark()
def benchmark_8_gpu_slack(self):
"""Test trivial Keras model (input pipeline) with tf.data's
experimental_slack and 8 GPUs.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
FLAGS.batch_size = 256 * 8
FLAGS.tf_data_experimental_slack = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self): def benchmark_graph_8_gpu(self):
...@@ -1166,6 +1147,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1166,6 +1147,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked') FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked')
FLAGS.batch_size = 256 * 8 FLAGS.batch_size = 256 * 8
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark() self._run_and_report_benchmark()
def fill_report_object(self, stats): def fill_report_object(self, stats):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment