"docs/backend/hyperparameter_tuning.md" did not exist on "29ebe3dff475b87f9e252fa9257ab9b64ee4988f"
Commit 9e9534e8 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Deprecate the graph rewrite path for fp16. This is no longer a TF2 api and there is no usage.

PiperOrigin-RevId: 410629444
parent a8dd50cd
...@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
"""Performance for 1 GPU no DS with automatic mixed precision."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_amp_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
def benchmark_8_gpu_amp_mrpc(self):
"""Test BERT model performance with 8 GPUs with automatic mixed precision."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 32
FLAGS.eval_batch_size = 32
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden') @owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self): def benchmark_2x2_tpu_mrpc(self):
"""Test BERT model performance with 2x2 TPU.""" """Test BERT model performance with 2x2 TPU."""
......
...@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp_squad')
FLAGS.train_batch_size = 4
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_squad')
FLAGS.train_batch_size = 32
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden') @owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self): def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model performance with 2x2 TPU.""" """Tests BERT SQuAD model performance with 2x2 TPU."""
......
...@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU and fp16.""" """Test Keras model with 1 GPU and fp16."""
self._setup() self._setup()
...@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self): def benchmark_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning and 8 GPUs.""" """Test Keras model with manual config tuning and 8 GPUs."""
self._setup() self._setup()
...@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark): ...@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.batch_size = 128 * 8 # 8 GPUs FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_8_gpu_amp(self):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_tweaked(self): def benchmark_xla_8_gpu_tweaked(self):
"""Test Keras model with manual config tuning, 8 GPUs, and XLA.""" """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
self._setup() self._setup()
...@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): ...@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self._override_flags_to_run_test_shorter() self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self): def benchmark_xla_1_gpu(self):
"""Test Keras model with XLA and 1 GPU.""" """Test Keras model with XLA and 1 GPU."""
self._setup() self._setup()
...@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase): ...@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self._override_flags_to_run_test_shorter() self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.enable_eager = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
self._override_flags_to_run_test_shorter()
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self): def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU and fp16.""" """Test Keras model with 1 GPU and fp16."""
self._setup() self._setup()
......
...@@ -14,10 +14,6 @@ ...@@ -14,10 +14,6 @@
# ============================================================================== # ==============================================================================
"""Runs a ResNet model on the ImageNet dataset.""" """Runs a ResNet model on the ImageNet dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
# Import libraries # Import libraries
...@@ -197,7 +193,6 @@ def run(flags_obj): ...@@ -197,7 +193,6 @@ def run(flags_obj):
optimizer = performance.configure_optimizer( optimizer = performance.configure_optimizer(
optimizer, optimizer,
use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16, use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16,
use_graph_rewrite=flags_obj.fp16_implementation == 'graph_rewrite',
loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),) loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark. # TODO(hongkuny): Remove trivial model usage and move it to benchmark.
...@@ -243,8 +238,7 @@ def run(flags_obj): ...@@ -243,8 +238,7 @@ def run(flags_obj):
if flags_obj.clustering_method == 'selective_clustering': if flags_obj.clustering_method == 'selective_clustering':
import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top
if dtype != tf.float32 or \ if dtype != tf.float32:
flags_obj.fp16_implementation == 'graph_rewrite':
raise NotImplementedError( raise NotImplementedError(
'Clustering is currently only supported on dtype=tf.float32.') 'Clustering is currently only supported on dtype=tf.float32.')
model = _cluster_last_three_conv2d_layers(model) model = _cluster_last_three_conv2d_layers(model)
......
...@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.loss_scale = 8192 FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like(self):
"""1 GPU using CTL and FP16 graph rewrite."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_fp16_graph_rewrite_mlperf_like(self):
"""1 GPU using FP16 graph rewrite."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self): def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
"""1 GPU using CTL with eager and distribution strategy.""" """1 GPU using CTL with eager and distribution strategy."""
self._setup() self._setup()
...@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase): ...@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS.loss_scale = 8192 FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like() self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like(self):
"""8 GPU FP16 graph rewrite using CTL."""
self._setup()
self._set_8_gpu_defaults()
FLAGS.keras_use_ctl = True
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase): class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
"""NCF Keras throughput benchmarks.""" """NCF Keras throughput benchmarks."""
......
...@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark): ...@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark):
FLAGS.dtype = 'fp16' FLAGS.dtype = 'fp16'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs and mixed precision via graph rewrite."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self): def _run_and_report_benchmark(self):
start_time_sec = time.time() start_time_sec = time.time()
...@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark): ...@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS.dtype = 'fp16' FLAGS.dtype = 'fp16'
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_amp(self):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_amp(self):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_amp')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_1_gpu_eager(self): def benchmark_1_gpu_eager(self):
"""Test Keras model with 1 GPU in pure eager mode.""" """Test Keras model with 1 GPU in pure eager mode."""
self._setup() self._setup()
...@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark): ...@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS.batch_size = 128 FLAGS.batch_size = 128
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_amp(self):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def _set_df_common(self): def _set_df_common(self):
FLAGS.steps_per_loop = 500 FLAGS.steps_per_loop = 500
FLAGS.train_epochs = 2 FLAGS.train_epochs = 2
......
...@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark): ...@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
bleu_min=28, bleu_min=28,
bleu_max=29.2) bleu_max=29.2)
def benchmark_8_gpu_fp16_amp(self):
"""Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision.
Should converge to 28.4 BLEU (uncased). This has not be verified yet."
"""
self._setup()
self._set_data_file_flags()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.param_set = 'big'
FLAGS.batch_size = 3072*8
FLAGS.train_steps = 20000 * 12
FLAGS.steps_between_evals = 20000
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_amp')
self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps,
bleu_min=28,
bleu_max=29)
def benchmark_8_gpu_static_batch_fp16(self): def benchmark_8_gpu_static_batch_fp16(self):
"""Benchmark 8 gpu with static batch and fp16. """Benchmark 8 gpu with static batch and fp16.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment