Unverified Commit 097c8051 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Add ResNet tests for NHWC and layout optimizer off. (#7018)

* layout off for some tests and channels last.

* 8 gpu tests channels_last

* more layout off tests.
parent 7b329985
...@@ -156,6 +156,15 @@ def get_config_proto_v1(): ...@@ -156,6 +156,15 @@ def get_config_proto_v1():
# OOM and performance regression. # OOM and performance regression.
config.graph_options.rewrite_options.pin_to_host_optimization = ( config.graph_options.rewrite_options.pin_to_host_optimization = (
rewriter_config_pb2.RewriterConfig.OFF) rewriter_config_pb2.RewriterConfig.OFF)
# TODO(b/76028325): Remove when generic layout optimizer will be ready.
if not FLAGS.enable_grappler_layout_optimizer:
if config is None:
config = tf.compat.v1.ConfigProto()
# Disable LayoutOptimizer in grappler, because it might de-optimize fp16
# graphs, and force NCHW data format in all convolutions and batch
# normalizations.
config.graph_options.rewrite_options.layout_optimizer = (
rewriter_config_pb2.RewriterConfig.OFF)
return config return config
...@@ -166,7 +175,15 @@ def set_config_v2(): ...@@ -166,7 +175,15 @@ def set_config_v2():
# Disable PinToHostOptimizer in grappler when enabling XLA because it # Disable PinToHostOptimizer in grappler when enabling XLA because it
# causes OOM and performance regression. # causes OOM and performance regression.
tf.config.optimizer.set_experimental_options( tf.config.optimizer.set_experimental_options(
{"pin_to_host_optimization": False} {'pin_to_host_optimization': False}
)
# TODO(b/76028325): Remove when generic layout optimizer will be ready.
if not FLAGS.enable_grappler_layout_optimizer:
# Disable LayoutOptimizer in grappler, because it might de-optimize fp16
# graphs, and force NCHW data format in all convolutions and batch
# normalizations.
tf.config.optimizer.set_experimental_options(
{'layout_optimizer': False}
) )
...@@ -326,7 +343,15 @@ def define_keras_flags(): ...@@ -326,7 +343,15 @@ def define_keras_flags():
flags.DEFINE_boolean( flags.DEFINE_boolean(
name='enable_get_next_as_optional', default=False, name='enable_get_next_as_optional', default=False,
help='Enable get_next_as_optional behavior in DistributedIterator.') help='Enable get_next_as_optional behavior in DistributedIterator.')
# TODO(b/76028325): Remove when generic layout optimizer is ready.
flags.DEFINE_boolean(
name='enable_grappler_layout_optimizer',
default=True,
help='Enable Grappler layout optimizer. Currently Grappler can '
'de-optimize fp16 graphs byt forcing NCHW layout for all '
'convolutions and batch normalizations, and this flag allows to '
'disable it.'
)
def get_synth_input_fn(height, width, num_channels, num_classes, def get_synth_input_fn(height, width, num_channels, num_classes,
dtype=tf.float32, drop_remainder=True): dtype=tf.float32, drop_remainder=True):
......
...@@ -19,7 +19,7 @@ import os ...@@ -19,7 +19,7 @@ import os
import time import time
from absl import flags from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import imagenet_main from official.resnet import imagenet_main
from official.resnet.keras import keras_benchmark from official.resnet.keras import keras_benchmark
...@@ -292,6 +292,18 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -292,6 +292,18 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_layout_off(self):
"""Test Keras model with 1 GPU and no layout optimizer."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_layout_off')
FLAGS.batch_size = 128
FLAGS.enable_grappler_layout_optimizer = False
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -304,6 +316,18 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -304,6 +316,18 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_layout_off(self):
"""Test Keras model with 1 GPU and xla w/no layout optimizer."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_layout_off')
FLAGS.batch_size = 128
FLAGS.enable_grappler_layout_optimizer = False
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU and fp16.""" """Test Keras model with 1 GPU and fp16."""
self._setup() self._setup()
...@@ -316,6 +340,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -316,6 +340,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 FLAGS.batch_size = 256
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_fp16_layout_off(self):
"""Test Keras model with 1 GPU and FP16 w/no layout optimizer."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_layout_off')
FLAGS.dtype = 'fp16'
FLAGS.batch_size = 256
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16_dynamic(self): def benchmark_1_gpu_fp16_dynamic(self):
"""Test Keras model with 1 GPU, fp16, and dynamic loss scaling.""" """Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self._setup() self._setup()
...@@ -342,6 +380,21 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -342,6 +380,21 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 FLAGS.batch_size = 256
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_layout_off(self):
"""Test Keras model with FP16+XLA w/no layout optimizer."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_layout_off')
FLAGS.dtype = 'fp16'
FLAGS.batch_size = 256
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_tweaked(self): def benchmark_xla_1_gpu_fp16_tweaked(self):
"""Test Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self._setup() self._setup()
...@@ -358,8 +411,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -358,8 +411,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_fp16_slack(self): def benchmark_xla_1_gpu_fp16_slack(self):
"""Test Keras model with XLA, 1 GPU, fp16, and tf.data's experimental_slack """Test Keras model tf.data's experimental_slack functionality."""
functionality."""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -435,9 +487,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -435,9 +487,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_xla_1_gpu_fp16_tweaked(self): def benchmark_graph_xla_1_gpu_fp16_tweaked(self):
"""Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and manual """Test Keras model in legacy graph with 1 GPU, fp16, XLA, and tuning."""
config tuning.
"""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -453,9 +503,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -453,9 +503,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_xla_1_gpu_fp16_slack(self): def benchmark_graph_xla_1_gpu_fp16_slack(self):
"""Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and """Test model in legacy graph with tf.data's experimental_slack."""
tf.data's experimental_slack functionality.
"""
self._setup() self._setup()
FLAGS.num_gpus = 1 FLAGS.num_gpus = 1
...@@ -556,6 +604,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -556,6 +604,20 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_layout_off(self):
"""Test Keras model with 8 GPUs, fp16, and layout off."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_layout_off')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning(self): def benchmark_8_gpu_fp16_cloning(self):
"""Test Keras model with 8 GPUs, fp16 and cloning.""" """Test Keras model with 8 GPUs, fp16 and cloning."""
self._setup() self._setup()
...@@ -583,6 +645,24 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -583,6 +645,24 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.tf_gpu_thread_mode = 'gpu_private' FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_tweaked_layout_off(self):
"""Test Keras model with 8 GPUs, fp16,tuning, and layout off."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir(
'benchmark_8_gpu_fp16_tweaked_layout_off')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.use_tensor_lr = True
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.data_delay_prefetch = True
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_cloning_tweaked(self): def benchmark_8_gpu_fp16_cloning_tweaked(self):
"""Test Keras model with 8 GPUs, fp16, cloning, and manual config tuning.""" """Test Keras model with 8 GPUs, fp16, cloning, and manual config tuning."""
self._setup() self._setup()
...@@ -647,6 +727,21 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -647,6 +727,21 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 256 * 8 # 8 GPUs FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_layout_off(self):
"""Test Keras model with XLA, 8 GPUs, fp16, and layout off."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_layout_off')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning(self): def benchmark_xla_8_gpu_fp16_cloning(self):
"""Test Keras model with XLA, 8 GPUs, fp16 and cloning.""" """Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
self._setup() self._setup()
...@@ -695,6 +790,24 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -695,6 +790,24 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.data_delay_prefetch = True FLAGS.data_delay_prefetch = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked_layout_off(self):
"""Test with tuning, FP16+XLA, cloning, and layout_off."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.enable_eager = True
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'default'
FLAGS.clone_model_in_keras_dist_strat = True
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_cloning_tweaked_layout_off')
FLAGS.batch_size = 256 * 8
FLAGS.use_tensor_lr = True
FLAGS.enable_grappler_layout_optimizer = False
FLAGS.data_format = 'channels_last'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_cloning_tweaked_optional_next(self): def benchmark_xla_8_gpu_fp16_cloning_tweaked_optional_next(self):
"""Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning. """Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning.
...@@ -882,9 +995,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -882,9 +995,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_8_gpu_fp16_tweaked(self): def benchmark_graph_8_gpu_fp16_tweaked(self):
"""Test Keras model in legacy graph mode with manual config tuning, 8 GPUs """Test Keras model in legacy graph mode, tuning, 8 GPUs, and FP16."""
and fp16.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -898,9 +1009,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -898,9 +1009,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_xla_8_gpu_fp16_tweaked(self): def benchmark_graph_xla_8_gpu_fp16_tweaked(self):
"""Test Keras model in legacy graph mode with manual config tuning, XLA, """Test Keras model in legacy graph tuning, XLA_FP16, 8 GPUs and fp16."""
8 GPUs and fp16.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -956,9 +1065,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -956,9 +1065,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_xla_8_gpu_fp16_slack(self): def benchmark_graph_xla_8_gpu_fp16_slack(self):
"""Test Keras model in legacy graph mode with tf.data's experimental_slack """Test legacy graph mode with tf.data's experimental_slack."""
functionality, XLA, 8 GPUs and fp16.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -1122,9 +1229,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1122,9 +1229,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Test trivial Keras model (input pipeline) with manual config tuning and """Test trivial Keras model with tuning and 8 GPUs."""
8 GPUs.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -1137,9 +1242,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1137,9 +1242,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self): def benchmark_graph_8_gpu(self):
"""Test trivial Keras model (input pipeline) in legacy graph mode with 8 """Test trivial Keras model in legacy graph mode with 8 GPUs."""
GPUs.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
...@@ -1150,9 +1253,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark): ...@@ -1150,9 +1253,7 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_graph_8_gpu_tweaked(self): def benchmark_graph_8_gpu_tweaked(self):
"""Test trivial Keras model (input pipeline) in legacy graph mode with """Test trivial Keras model in legacy graph mode with tuning and 8 GPUs."""
manual config tuning and 8 GPUs.
"""
self._setup() self._setup()
FLAGS.num_gpus = 8 FLAGS.num_gpus = 8
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment