Commit 684bddcd authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 306136273
parent 7b8024c2
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Executes Keras benchmarks and accuracy tests.""" """Executes Keras benchmarks and accuracy tests."""
# pylint: disable=line-too-long
from __future__ import print_function from __future__ import print_function
import json import json
...@@ -144,7 +145,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark): ...@@ -144,7 +145,6 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
dataset_num_private_threads: Optional[int] = None, dataset_num_private_threads: Optional[int] = None,
loss_scale: Optional[str] = None): loss_scale: Optional[str] = None):
"""Runs and reports the benchmark given the provided configuration.""" """Runs and reports the benchmark given the provided configuration."""
self._setup()
FLAGS.model_type = 'resnet' FLAGS.model_type = 'resnet'
FLAGS.dataset = 'imagenet' FLAGS.dataset = 'imagenet'
FLAGS.mode = 'train_and_eval' FLAGS.mode = 'train_and_eval'
...@@ -331,7 +331,6 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -331,7 +331,6 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
dataset_num_private_threads: Optional[int] = None, dataset_num_private_threads: Optional[int] = None,
loss_scale: Optional[str] = None): loss_scale: Optional[str] = None):
"""Runs and reports the benchmark given the provided configuration.""" """Runs and reports the benchmark given the provided configuration."""
self._setup()
FLAGS.model_type = 'resnet' FLAGS.model_type = 'resnet'
FLAGS.dataset = 'imagenet' FLAGS.dataset = 'imagenet'
FLAGS.mode = 'train_and_eval' FLAGS.mode = 'train_and_eval'
...@@ -372,6 +371,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -372,6 +371,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_1_gpu_no_dist_strat(self): def benchmark_1_gpu_no_dist_strat(self):
"""Tests Keras model with 1 GPU, no distribution strategy.""" """Tests Keras model with 1 GPU, no distribution strategy."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu_no_dist_strat', experiment_name='benchmark_1_gpu_no_dist_strat',
num_gpus=1, num_gpus=1,
...@@ -380,6 +380,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -380,6 +380,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_1_gpu_no_dist_strat_run_eagerly(self): def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
"""Tests Keras model with 1 GPU, no distribution strategy, run eagerly.""" """Tests Keras model with 1 GPU, no distribution strategy, run eagerly."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly', experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly',
num_gpus=1, num_gpus=1,
...@@ -389,6 +390,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -389,6 +390,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self): def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
"""Tests with 1 GPU, no distribution strategy, fp16, run eagerly.""" """Tests with 1 GPU, no distribution strategy, fp16, run eagerly."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly_fp16', experiment_name='benchmark_1_gpu_no_dist_strat_run_eagerly_fp16',
num_gpus=1, num_gpus=1,
...@@ -399,6 +401,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -399,6 +401,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_1_gpu(self): def benchmark_1_gpu(self):
"""Tests Keras model with 1 GPU.""" """Tests Keras model with 1 GPU."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu', experiment_name='benchmark_1_gpu',
num_gpus=1, num_gpus=1,
...@@ -407,16 +410,17 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -407,16 +410,17 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Tests Keras model with XLA and 1 GPU.""" """Tests Keras model with XLA and 1 GPU."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_1_gpu', experiment_name='benchmark_xla_1_gpu',
num_gpus=1, num_gpus=1,
enable_xla=True, enable_xla=True,
distribution_strategy='one_device', distribution_strategy='one_device',
per_replica_batch_size=128) per_replica_batch_size=128)
self._setup()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Tests Keras model with 1 GPU and fp16.""" """Tests Keras model with 1 GPU and fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu_fp16', experiment_name='benchmark_1_gpu_fp16',
num_gpus=1, num_gpus=1,
...@@ -426,6 +430,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -426,6 +430,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_1_gpu_fp16_dynamic(self): def benchmark_1_gpu_fp16_dynamic(self):
"""Tests Keras model with 1 GPU, fp16, and dynamic loss scaling.""" """Tests Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_1_gpu_fp16_dynamic', experiment_name='benchmark_1_gpu_fp16_dynamic',
num_gpus=1, num_gpus=1,
...@@ -436,6 +441,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -436,6 +441,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_1_gpu_fp16(self): def benchmark_xla_1_gpu_fp16(self):
"""Tests Keras model with XLA, 1 GPU and fp16.""" """Tests Keras model with XLA, 1 GPU and fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_1_gpu_fp16', experiment_name='benchmark_xla_1_gpu_fp16',
num_gpus=1, num_gpus=1,
...@@ -446,6 +452,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -446,6 +452,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_1_gpu_fp16_tweaked(self): def benchmark_xla_1_gpu_fp16_tweaked(self):
"""Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning.""" """Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_1_gpu_fp16_tweaked', experiment_name='benchmark_xla_1_gpu_fp16_tweaked',
num_gpus=1, num_gpus=1,
...@@ -457,6 +464,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -457,6 +464,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_1_gpu_fp16_dynamic(self): def benchmark_xla_1_gpu_fp16_dynamic(self):
"""Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling.""" """Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_1_gpu_fp16_dynamic', experiment_name='benchmark_xla_1_gpu_fp16_dynamic',
num_gpus=1, num_gpus=1,
...@@ -468,6 +476,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -468,6 +476,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_graph_1_gpu(self): def benchmark_graph_1_gpu(self):
"""Tests Keras model in legacy graph mode with 1 GPU.""" """Tests Keras model in legacy graph mode with 1 GPU."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_graph_1_gpu', experiment_name='benchmark_graph_1_gpu',
num_gpus=1, num_gpus=1,
...@@ -476,6 +485,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -476,6 +485,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_graph_xla_1_gpu(self): def benchmark_graph_xla_1_gpu(self):
"""Tests Keras model in legacy graph mode with XLA and 1 GPU.""" """Tests Keras model in legacy graph mode with XLA and 1 GPU."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_graph_xla_1_gpu', experiment_name='benchmark_graph_xla_1_gpu',
num_gpus=1, num_gpus=1,
...@@ -485,6 +495,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -485,6 +495,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Tests Keras model with 8 GPUs.""" """Tests Keras model with 8 GPUs."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_8_gpu', experiment_name='benchmark_8_gpu',
num_gpus=8, num_gpus=8,
...@@ -493,6 +504,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -493,6 +504,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Tests Keras model with manual config tuning and 8 GPUs.""" """Tests Keras model with manual config tuning and 8 GPUs."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_8_gpu_tweaked', experiment_name='benchmark_8_gpu_tweaked',
num_gpus=8, num_gpus=8,
...@@ -502,6 +514,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -502,6 +514,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_8_gpu(self): def benchmark_xla_8_gpu(self):
"""Tests Keras model with XLA and 8 GPUs.""" """Tests Keras model with XLA and 8 GPUs."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu', experiment_name='benchmark_xla_8_gpu',
num_gpus=8, num_gpus=8,
...@@ -511,6 +524,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -511,6 +524,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_8_gpu_tweaked(self): def benchmark_xla_8_gpu_tweaked(self):
"""Tests Keras model with manual config tuning, 8 GPUs, and XLA.""" """Tests Keras model with manual config tuning, 8 GPUs, and XLA."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu_tweaked', experiment_name='benchmark_xla_8_gpu_tweaked',
num_gpus=8, num_gpus=8,
...@@ -522,6 +536,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -522,6 +536,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_8_gpu_fp16(self): def benchmark_8_gpu_fp16(self):
"""Tests Keras model with 8 GPUs and fp16.""" """Tests Keras model with 8 GPUs and fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_8_gpu_fp16', experiment_name='benchmark_8_gpu_fp16',
num_gpus=8, num_gpus=8,
...@@ -531,6 +546,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -531,6 +546,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_8_gpu_fp16_tweaked(self): def benchmark_8_gpu_fp16_tweaked(self):
"""Tests Keras model with 8 GPUs, fp16, and manual config tuning.""" """Tests Keras model with 8 GPUs, fp16, and manual config tuning."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_8_gpu_fp16_tweaked', experiment_name='benchmark_8_gpu_fp16_tweaked',
num_gpus=8, num_gpus=8,
...@@ -541,6 +557,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -541,6 +557,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_8_gpu_fp16_dynamic_tweaked(self): def benchmark_8_gpu_fp16_dynamic_tweaked(self):
"""Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned.""" """Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_8_gpu_fp16_dynamic_tweaked', experiment_name='benchmark_8_gpu_fp16_dynamic_tweaked',
num_gpus=8, num_gpus=8,
...@@ -552,6 +569,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -552,6 +569,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_8_gpu_fp16(self): def benchmark_xla_8_gpu_fp16(self):
"""Tests Keras model with XLA, 8 GPUs and fp16.""" """Tests Keras model with XLA, 8 GPUs and fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu_fp16', experiment_name='benchmark_xla_8_gpu_fp16',
dtype='float16', dtype='float16',
...@@ -562,6 +580,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -562,6 +580,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_8_gpu_fp16_tweaked(self): def benchmark_xla_8_gpu_fp16_tweaked(self):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16.""" """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu_fp16_tweaked', experiment_name='benchmark_xla_8_gpu_fp16_tweaked',
dtype='float16', dtype='float16',
...@@ -577,6 +596,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -577,6 +596,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
Delay performance measurement for stable performance on 96 vCPU platforms. Delay performance measurement for stable performance on 96 vCPU platforms.
""" """
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu_fp16_tweaked_delay_measure', experiment_name='benchmark_xla_8_gpu_fp16_tweaked_delay_measure',
dtype='float16', dtype='float16',
...@@ -589,6 +609,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -589,6 +609,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self): def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
"""Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16.""" """Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_xla_8_gpu_fp16_dynamic_tweaked', experiment_name='benchmark_xla_8_gpu_fp16_dynamic_tweaked',
dtype='float16', dtype='float16',
...@@ -602,6 +623,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -602,6 +623,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_graph_8_gpu(self): def benchmark_graph_8_gpu(self):
"""Tests Keras model in legacy graph mode with 8 GPUs.""" """Tests Keras model in legacy graph mode with 8 GPUs."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_graph_8_gpu', experiment_name='benchmark_graph_8_gpu',
num_gpus=8, num_gpus=8,
...@@ -610,6 +632,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -610,6 +632,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_graph_xla_8_gpu(self): def benchmark_graph_xla_8_gpu(self):
"""Tests Keras model in legacy graph mode with XLA and 8 GPUs.""" """Tests Keras model in legacy graph mode with XLA and 8 GPUs."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_graph_xla_8_gpu', experiment_name='benchmark_graph_xla_8_gpu',
num_gpus=8, num_gpus=8,
...@@ -619,6 +642,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -619,6 +642,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_2x2_tpu_fp16(self): def benchmark_2x2_tpu_fp16(self):
"""Test Keras model with 2x2 TPU, fp16.""" """Test Keras model with 2x2 TPU, fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_2x2_tpu_fp16', experiment_name='benchmark_2x2_tpu_fp16',
dtype='bfloat16', dtype='bfloat16',
...@@ -627,6 +651,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -627,6 +651,7 @@ class Resnet50KerasClassifierBenchmarkBase(keras_benchmark.KerasBenchmark):
def benchmark_4x4_tpu_fp16(self): def benchmark_4x4_tpu_fp16(self):
"""Test Keras model with 4x4 TPU, fp16.""" """Test Keras model with 4x4 TPU, fp16."""
self._setup()
self._run_and_report_benchmark( self._run_and_report_benchmark(
experiment_name='benchmark_4x4_tpu_fp16', experiment_name='benchmark_4x4_tpu_fp16',
dtype='bfloat16', dtype='bfloat16',
...@@ -1277,8 +1302,7 @@ class Resnet50KerasBenchmarkReal(Resnet50KerasClassifierBenchmarkBase): ...@@ -1277,8 +1302,7 @@ class Resnet50KerasBenchmarkReal(Resnet50KerasClassifierBenchmarkBase):
"""Resnet50 real data benchmark tests.""" """Resnet50 real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs): def __init__(self, output_dir=None, root_data_dir=None, tpu=None, **kwargs):
data_dir = ('/readahead/200M/placer/prod/home/distbelief/' data_dir = os.path.join(root_data_dir, 'imagenet')
'imagenet-tensorflow/imagenet-2012-tfrecord')
def_flags = {} def_flags = {}
def_flags['log_steps'] = 10 def_flags['log_steps'] = 10
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Executes CTL benchmarks and accuracy tests.""" """Executes CTL benchmarks and accuracy tests."""
# pylint: disable=line-too-long,g-bad-import-order
from __future__ import print_function from __future__ import print_function
import os import os
import time import time
# pylint: disable=g-bad-import-order
from absl import flags from absl import flags
import tensorflow as tf import tensorflow as tf
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment