Deprecate the graph rewrite path for fp16. This is no longer a TF2 api and there is no usage.

PiperOrigin-RevId: 410629444

Deprecate the graph rewrite path for fp16. This is no longer a TF2 api and there is no usage.
PiperOrigin-RevId: 410629444
9e9534e8 · Hongkun Yu · A. Unique TensorFlower · a8dd50cd · 9e9534e8 · 9e9534e8
Commit 9e9534e8 authored Nov 17, 2021 by Hongkun Yu Committed by A. Unique TensorFlower Nov 17, 2021
7 changed files
--- a/official/benchmark/bert_benchmark.py
+++ b/official/benchmark/bert_benchmark.py
@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)
-  def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
-    """Performance for 1 GPU no DS with automatic mixed precision."""
-    self._setup()
-    self.num_gpus = 1
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_1_gpu_amp_mrpc_no_dist_strat')
-    FLAGS.train_data_path = self.train_data_path
-    FLAGS.eval_data_path = self.eval_data_path
-    FLAGS.input_meta_data_path = self.input_meta_data_path
-    FLAGS.bert_config_file = self.bert_config_file
-    FLAGS.train_batch_size = 4
-    FLAGS.eval_batch_size = 4
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    summary_path = os.path.join(FLAGS.model_dir,
-                                'summaries/training_summary.txt')
-    self._run_and_report_benchmark(summary_path, use_ds=False)
-  def benchmark_8_gpu_amp_mrpc(self):
-    """Test BERT model performance with 8 GPUs with automatic mixed precision."""
-    self._setup()
-    self.num_gpus = 8
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
-    FLAGS.train_data_path = self.train_data_path
-    FLAGS.eval_data_path = self.eval_data_path
-    FLAGS.input_meta_data_path = self.input_meta_data_path
-    FLAGS.bert_config_file = self.bert_config_file
-    FLAGS.train_batch_size = 32
-    FLAGS.eval_batch_size = 32
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    summary_path = os.path.join(FLAGS.model_dir,
-                                'summaries/training_summary.txt')
-    self._run_and_report_benchmark(summary_path, use_ds=False)
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

--- a/official/benchmark/bert_squad_benchmark.py
+++ b/official/benchmark/bert_squad_benchmark.py
@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_amp(self):
-    """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
-    self._setup()
-    self.num_gpus = 1
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp_squad')
-    FLAGS.train_batch_size = 4
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    self._run_and_report_benchmark()
-  def benchmark_8_gpu_amp(self):
-    """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
-    self._setup()
-    self.num_gpus = 8
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_squad')
-    FLAGS.train_batch_size = 32
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    self._run_and_report_benchmark()
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu(self):
    """Tests BERT SQuAD model performance with 2x2 TPU."""

--- a/official/benchmark/keras_imagenet_benchmark.py
+++ b/official/benchmark/keras_imagenet_benchmark.py
@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_amp(self):
-    """Test Keras model with 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
-    FLAGS.batch_size = 256
-    self._run_and_report_benchmark()
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()
@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()
-  def benchmark_xla_1_gpu_amp(self):
-    """Test Keras model with XLA and 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.enable_xla = True
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
-    FLAGS.batch_size = 256
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_fp16(self):
    """Test Keras model with 1 GPU and fp16."""
    self._setup()
@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()
-  def benchmark_8_gpu_amp(self):
-    """Test Keras model with 8 GPUs with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.distribution_strategy = 'mirrored'
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
-    FLAGS.batch_size = 256 * 8  # 8 GPUs
-    self._run_and_report_benchmark()
  def benchmark_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning and 8 GPUs."""
    self._setup()
@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()
-  def benchmark_xla_8_gpu_amp(self):
-    """Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.enable_xla = True
-    FLAGS.distribution_strategy = 'mirrored'
-    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
-    FLAGS.batch_size = 256 * 8  # 8 GPUs
-    self._run_and_report_benchmark()
  def benchmark_xla_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
    self._setup()
@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
    self._override_flags_to_run_test_shorter()
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_amp(self):
-    """Test Keras model with 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
-    FLAGS.batch_size = 256
-    self._override_flags_to_run_test_shorter()
-    self._run_and_report_benchmark()
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()
@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
    self._override_flags_to_run_test_shorter()
    self._run_and_report_benchmark()
-  def benchmark_xla_1_gpu_amp(self):
-    """Test Keras model with XLA and 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.enable_xla = True
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
-    FLAGS.batch_size = 256
-    self._override_flags_to_run_test_shorter()
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_fp16(self):
    """Test Keras model with 1 GPU and fp16."""
    self._setup()

--- a/official/benchmark/models/resnet_imagenet_main.py
+++ b/official/benchmark/models/resnet_imagenet_main.py
@@ -14,10 +14,6 @@
 # ==============================================================================
 """Runs a ResNet model on the ImageNet dataset."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 import os
 # Import libraries
@@ -197,7 +193,6 @@ def run(flags_obj):
    optimizer = performance.configure_optimizer(
        optimizer,
        use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16,
-        use_graph_rewrite=flags_obj.fp16_implementation == 'graph_rewrite',
        loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),)
    # TODO(hongkuny): Remove trivial model usage and move it to benchmark.
@@ -243,8 +238,7 @@ def run(flags_obj):
    if flags_obj.clustering_method == 'selective_clustering':
      import tensorflow_model_optimization as tfmot  # pylint: disable=g-import-not-at-top
-      if dtype != tf.float32 or \
+      if dtype != tf.float32:
-        flags_obj.fp16_implementation == 'graph_rewrite':
        raise NotImplementedError(
            'Clustering is currently only supported on dtype=tf.float32.')
      model = _cluster_last_three_conv2d_layers(model)

--- a/official/benchmark/ncf_keras_benchmark.py
+++ b/official/benchmark/ncf_keras_benchmark.py
@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.loss_scale = 8192
    self._run_and_report_benchmark_mlperf_like()
-  def benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like(self):
-    """1 GPU using CTL and FP16 graph rewrite."""
-    self._setup()
-    FLAGS.keras_use_ctl = True
-    FLAGS.train_epochs = 7
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.loss_scale = 8192
-    self._run_and_report_benchmark_mlperf_like()
-  def benchmark_1_gpu_fp16_graph_rewrite_mlperf_like(self):
-    """1 GPU using FP16 graph rewrite."""
-    self._setup()
-    FLAGS.train_epochs = 7
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.loss_scale = 8192
-    self._run_and_report_benchmark_mlperf_like()
  def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
    """1 GPU using CTL with eager and distribution strategy."""
    self._setup()
@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.loss_scale = 8192
    self._run_and_report_benchmark_mlperf_like()
-  def benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like(self):
-    """8 GPU FP16 graph rewrite using CTL."""
-    self._setup()
-    self._set_8_gpu_defaults()
-    FLAGS.keras_use_ctl = True
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.loss_scale = 8192
-    self._run_and_report_benchmark_mlperf_like()
 class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
  """NCF Keras throughput benchmarks."""

--- a/official/benchmark/resnet_ctl_imagenet_benchmark.py
+++ b/official/benchmark/resnet_ctl_imagenet_benchmark.py
@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark):
    FLAGS.dtype = 'fp16'
    self._run_and_report_benchmark()
-  def benchmark_8_gpu_amp(self):
-    """Test Keras model with 8 GPUs and mixed precision via graph rewrite."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 256 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.epochs_between_evals = 10
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    self._run_and_report_benchmark()
  @benchmark_wrappers.enable_runtime_flags
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
    FLAGS.dtype = 'fp16'
    self._run_and_report_benchmark()
-  def benchmark_1_gpu_amp(self):
-    """Test Keras model with 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
-    FLAGS.batch_size = 256
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    self._run_and_report_benchmark()
-  def benchmark_xla_1_gpu_amp(self):
-    """Test Keras model with XLA and 1 GPU with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.distribution_strategy = 'one_device'
-    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
-    FLAGS.batch_size = 256
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.enable_xla = True
-    self._run_and_report_benchmark()
  def benchmark_1_gpu_eager(self):
    """Test Keras model with 1 GPU in pure eager mode."""
    self._setup()
@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()
-  def benchmark_8_gpu_amp(self):
-    """Test Keras model with 8 GPUs with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.distribution_strategy = 'mirrored'
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
-    FLAGS.batch_size = 256 * 8  # 8 GPUs
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    self._run_and_report_benchmark()
-  def benchmark_xla_8_gpu_amp(self):
-    """Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.distribution_strategy = 'mirrored'
-    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
-    FLAGS.batch_size = 256 * 8  # 8 GPUs
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.enable_xla = True
-    self._run_and_report_benchmark()
  def _set_df_common(self):
    FLAGS.steps_per_loop = 500
    FLAGS.train_epochs = 2

--- a/official/benchmark/transformer_benchmark.py
+++ b/official/benchmark/transformer_benchmark.py
@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
                                   bleu_min=28,
                                   bleu_max=29.2)
-  def benchmark_8_gpu_fp16_amp(self):
-    """Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision.
-      Should converge to 28.4 BLEU (uncased). This has not be verified yet."
-    """
-    self._setup()
-    self._set_data_file_flags()
-    FLAGS.num_gpus = 8
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.param_set = 'big'
-    FLAGS.batch_size = 3072*8
-    FLAGS.train_steps = 20000 * 12
-    FLAGS.steps_between_evals = 20000
-    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_amp')
-    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
-                                   log_steps=FLAGS.log_steps,
-                                   bleu_min=28,
-                                   bleu_max=29)
  def benchmark_8_gpu_static_batch_fp16(self):
    """Benchmark 8 gpu with static batch and fp16.