Add benchmarks for thread tuning. (#6283)

* Add benchmarks for thread tuning. * Address comment/ * Add a comment.

Add benchmarks for thread tuning. (#6283)
* Add benchmarks for thread tuning. * Address comment/ * Add a comment.
54dffe2e · Yuefeng Zhou · Toby Boyd · f0899f18 · 54dffe2e
Commit 54dffe2e authored Feb 28, 2019 by Yuefeng Zhou Committed by Toby Boyd Feb 28, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 17 deletions

official/resnet/keras/keras_imagenet_benchmark.py official/resnet/keras/keras_imagenet_benchmark.py +13 -17

No files found.
--- a/official/resnet/keras/keras_imagenet_benchmark.py
+++ b/official/resnet/keras/keras_imagenet_benchmark.py
@@ -74,25 +74,10 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
+    # Add some thread tunings to improve performance.
+    FLAGS.datasets_num_private_threads = 14
    self._run_and_report_benchmark()
-  def benchmark_8_gpu_bfc_allocator(self):
-    """Restricts CPU memory allocation."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_bfc_allocator')
-    FLAGS.dtype = 'fp32'
-    FLAGS.batch_size = 128 * 8  # 8 GPUs
-    FLAGS.enable_eager = True
-    # Limits CPU memory to work around memory spikes in eager mode.
-    # TODO(yuefengz): get rid of this test once we fix the memory issue.
-    os.environ['TF_CPU_ALLOCATOR_USE_BFC'] = 'true'
-    os.environ['TF_CPU_BFC_MEM_LIMIT_IN_MB'] = '100000'
-    self._run_and_report_benchmark()
-    del os.environ['TF_CPU_ALLOCATOR_USE_BFC']
-    del os.environ['TF_CPU_BFC_MEM_LIMIT_IN_MB']
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(flags.FLAGS)
@@ -184,6 +169,17 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()
+  def benchmark_8_gpu_tweaked(self):
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_eager = True
+    FLAGS.distribution_strategy = 'default'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
+    FLAGS.batch_size = 128 * 8  # 8 GPUs
+    FLAGS.datasets_num_private_threads = 14
+    self._run_and_report_benchmark()
  def benchmark_graph_8_gpu(self):
    self._setup()