Commit 9e9534e8 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Deprecate the graph rewrite path for fp16. This is no longer a TF2 api and there is no usage.

PiperOrigin-RevId: 410629444
parent a8dd50cd
...@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
"""Performance for 1 GPU no DS with automatic mixed precision."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_amp_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
def benchmark_8_gpu_amp_mrpc(self):
"""Test BERT model performance with 8 GPUs with automatic mixed precision."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 32
FLAGS.eval_batch_size = 32
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden') @owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self): def benchmark_2x2_tpu_mrpc(self):
"""Test BERT model performance with 2x2 TPU.""" """Test BERT model performance with 2x2 TPU."""
......
...@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp_squad')
FLAGS.train_batch_size = 4
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_squad')
FLAGS.train_batch_size = 32
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden') @owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self): def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model performance with 2x2 TPU.""" """Tests BERT SQuAD model performance with 2x2 TPU."""
......
...@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU and fp16.""" """Test Keras model with 1 GPU and fp16."""
self._setup() self._setup()
...@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning and 8 GPUs.""" """Test Keras model with manual config tuning and 8 GPUs."""
self._setup() self._setup()
...@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_amp(self):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_tweaked(self): def benchmark_xla_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning, 8 GPUs, and XLA.""" """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
self._setup() self._setup()
...@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): ...@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self._override_flags_to_run_test_shorter() self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): ...@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self._override_flags_to_run_test_shorter() self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU and fp16.""" """Test Keras model with 1 GPU and fp16."""
self._setup() self._setup()
......
...@@ -14,10 +14,6 @@ ...@@ -14,10 +14,6 @@
# ============================================================================== # ==============================================================================
"""Runs a ResNet model on the ImageNet dataset.""" """Runs a ResNet model on the ImageNet dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
# Import libraries # Import libraries
...@@ -197,7 +193,6 @@ def run(flags_obj): ...@@ -197,7 +193,6 @@ def run(flags_obj):
optimizer = performance.configure_optimizer( optimizer = performance.configure_optimizer(
optimizer, optimizer,
use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16, use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16,
use_graph_rewrite=flags_obj.fp16_implementation == 'graph_rewrite',
loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),) loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark. # TODO(hongkuny): Remove trivial model usage and move it to benchmark.
...@@ -243,8 +238,7 @@ def run(flags_obj): ...@@ -243,8 +238,7 @@ def run(flags_obj):
if flags_obj.clustering_method == 'selective_clustering': if flags_obj.clustering_method == 'selective_clustering':
import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top
if dtype != tf.float32 or \ if dtype != tf.float32:
flags_obj.fp16_implementation == 'graph_rewrite':
raise NotImplementedError( raise NotImplementedError(
'Clustering is currently only supported on dtype=tf.float32.') 'Clustering is currently only supported on dtype=tf.float32.')
model = _cluster_last_three_conv2d_layers(model) model = _cluster_last_three_conv2d_layers(model)
......
...@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.loss_scale = 8192 FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like(self):
"""1 GPU using CTL and FP16 graph rewrite."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_fp16_graph_rewrite_mlperf_like(self):
"""1 GPU using FP16 graph rewrite."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self): def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
"""1 GPU using CTL with eager and distribution strategy.""" """1 GPU using CTL with eager and distribution strategy."""
self._setup() self._setup()
...@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.loss_scale = 8192 FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like(self):
"""8 GPU FP16 graph rewrite using CTL."""
self._setup()
self._set_8_gpu_defaults()
FLAGS.keras_use_ctl = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase): class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
"""NCF Keras throughput benchmarks.""" """NCF Keras throughput benchmarks."""
......
...@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark): ...@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark):
FLAGS.dtype = 'fp16' FLAGS.dtype = 'fp16'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs and mixed precision via graph rewrite."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self): def _run_and_report_benchmark(self):
start_time_sec = time.time() start_time_sec = time.time()
...@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark): ...@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS.dtype = 'fp16' FLAGS.dtype = 'fp16'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_1_gpu_eager(self): def benchmark_1_gpu_eager(self):
"""Test Keras model with 1 GPU in pure eager mode.""" """Test Keras model with 1 GPU in pure eager mode."""
self._setup() self._setup()
...@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark): ...@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_amp(self):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def _set_df_common(self): def _set_df_common(self):
FLAGS.steps_per_loop = 500 FLAGS.steps_per_loop = 500
FLAGS.train_epochs = 2 FLAGS.train_epochs = 2
......
...@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
bleu_min=28, bleu_min=28,
bleu_max=29.2) bleu_max=29.2)
def benchmark_8_gpu_fp16_amp(self):
"""Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision.
Should converge to 28.4 BLEU (uncased). This has not be verified yet."
"""
self._setup()
self._set_data_file_flags()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.param_set = 'big'
FLAGS.batch_size = 3072*8
FLAGS.train_steps = 20000 * 12
FLAGS.steps_between_evals = 20000
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_amp')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps,
bleu_min=28,
bleu_max=29)
def benchmark_8_gpu_static_batch_fp16(self): def benchmark_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch and fp16. """Benchmark 8 gpu with static batch and fp16.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment