Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
9e9534e8
Commit
9e9534e8
authored
Nov 17, 2021
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Nov 17, 2021
Browse files
Deprecate the graph rewrite path for fp16. This is no longer a TF2 api and there is no usage.
PiperOrigin-RevId: 410629444
parent
a8dd50cd
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1 addition
and
265 deletions
+1
-265
official/benchmark/bert_benchmark.py
official/benchmark/bert_benchmark.py
+0
-38
official/benchmark/bert_squad_benchmark.py
official/benchmark/bert_squad_benchmark.py
+0
-25
official/benchmark/keras_imagenet_benchmark.py
official/benchmark/keras_imagenet_benchmark.py
+0
-83
official/benchmark/models/resnet_imagenet_main.py
official/benchmark/models/resnet_imagenet_main.py
+1
-7
official/benchmark/ncf_keras_benchmark.py
official/benchmark/ncf_keras_benchmark.py
+0
-29
official/benchmark/resnet_ctl_imagenet_benchmark.py
official/benchmark/resnet_ctl_imagenet_benchmark.py
+0
-63
official/benchmark/transformer_benchmark.py
official/benchmark/transformer_benchmark.py
+0
-20
No files found.
official/benchmark/bert_benchmark.py
View file @
9e9534e8
...
@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
...
@@ -220,44 +220,6 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt'
)
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
self
.
_run_and_report_benchmark
(
summary_path
)
def
benchmark_1_gpu_amp_mrpc_no_dist_strat
(
self
):
"""Performance for 1 GPU no DS with automatic mixed precision."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp_mrpc_no_dist_strat'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
4
FLAGS
.
eval_batch_size
=
4
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
,
use_ds
=
False
)
def
benchmark_8_gpu_amp_mrpc
(
self
):
"""Test BERT model performance with 8 GPUs with automatic mixed precision."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp_mrpc'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
32
FLAGS
.
eval_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
,
use_ds
=
False
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu_mrpc
(
self
):
def
benchmark_2x2_tpu_mrpc
(
self
):
"""Test BERT model performance with 2x2 TPU."""
"""Test BERT model performance with 2x2 TPU."""
...
...
official/benchmark/bert_squad_benchmark.py
View file @
9e9534e8
...
@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
...
@@ -319,31 +319,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_amp
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp_squad'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp_squad'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-model-garden'
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu
(
self
):
def
benchmark_2x2_tpu
(
self
):
"""Tests BERT SQuAD model performance with 2x2 TPU."""
"""Tests BERT SQuAD model performance with 2x2 TPU."""
...
...
official/benchmark/keras_imagenet_benchmark.py
View file @
9e9534e8
...
@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
...
@@ -819,19 +819,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS
.
batch_size
=
128
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_amp
(
self
):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
def
benchmark_xla_1_gpu
(
self
):
"""Test Keras model with XLA and 1 GPU."""
"""Test Keras model with XLA and 1 GPU."""
self
.
_setup
()
self
.
_setup
()
...
@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
...
@@ -844,20 +831,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS
.
batch_size
=
128
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_amp
(
self
):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16
(
self
):
def
benchmark_1_gpu_fp16
(
self
):
"""Test Keras model with 1 GPU and fp16."""
"""Test Keras model with 1 GPU and fp16."""
self
.
_setup
()
self
.
_setup
()
...
@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
...
@@ -946,19 +919,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_tweaked
(
self
):
def
benchmark_8_gpu_tweaked
(
self
):
"""Test Keras model with manual config tuning and 8 GPUs."""
"""Test Keras model with manual config tuning and 8 GPUs."""
self
.
_setup
()
self
.
_setup
()
...
@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
...
@@ -983,20 +943,6 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_amp
(
self
):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_tweaked
(
self
):
def
benchmark_xla_8_gpu_tweaked
(
self
):
"""Test Keras model with manual config tuning, 8 GPUs, and XLA."""
"""Test Keras model with manual config tuning, 8 GPUs, and XLA."""
self
.
_setup
()
self
.
_setup
()
...
@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
...
@@ -1315,20 +1261,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self
.
_override_flags_to_run_test_shorter
()
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_amp
(
self
):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
def
benchmark_xla_1_gpu
(
self
):
"""Test Keras model with XLA and 1 GPU."""
"""Test Keras model with XLA and 1 GPU."""
self
.
_setup
()
self
.
_setup
()
...
@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
...
@@ -1342,21 +1274,6 @@ class Resnet50KerasBenchmarkRemoteData(Resnet50KerasBenchmarkBase):
self
.
_override_flags_to_run_test_shorter
()
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_amp
(
self
):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16
(
self
):
def
benchmark_1_gpu_fp16
(
self
):
"""Test Keras model with 1 GPU and fp16."""
"""Test Keras model with 1 GPU and fp16."""
self
.
_setup
()
self
.
_setup
()
...
...
official/benchmark/models/resnet_imagenet_main.py
View file @
9e9534e8
...
@@ -14,10 +14,6 @@
...
@@ -14,10 +14,6 @@
# ==============================================================================
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
"""Runs a ResNet model on the ImageNet dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
os
# Import libraries
# Import libraries
...
@@ -197,7 +193,6 @@ def run(flags_obj):
...
@@ -197,7 +193,6 @@ def run(flags_obj):
optimizer
=
performance
.
configure_optimizer
(
optimizer
=
performance
.
configure_optimizer
(
optimizer
,
optimizer
,
use_float16
=
flags_core
.
get_tf_dtype
(
flags_obj
)
==
tf
.
float16
,
use_float16
=
flags_core
.
get_tf_dtype
(
flags_obj
)
==
tf
.
float16
,
use_graph_rewrite
=
flags_obj
.
fp16_implementation
==
'graph_rewrite'
,
loss_scale
=
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
128
),)
loss_scale
=
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
128
),)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
...
@@ -243,8 +238,7 @@ def run(flags_obj):
...
@@ -243,8 +238,7 @@ def run(flags_obj):
if
flags_obj
.
clustering_method
==
'selective_clustering'
:
if
flags_obj
.
clustering_method
==
'selective_clustering'
:
import
tensorflow_model_optimization
as
tfmot
# pylint: disable=g-import-not-at-top
import
tensorflow_model_optimization
as
tfmot
# pylint: disable=g-import-not-at-top
if
dtype
!=
tf
.
float32
or
\
if
dtype
!=
tf
.
float32
:
flags_obj
.
fp16_implementation
==
'graph_rewrite'
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'Clustering is currently only supported on dtype=tf.float32.'
)
'Clustering is currently only supported on dtype=tf.float32.'
)
model
=
_cluster_last_three_conv2d_layers
(
model
)
model
=
_cluster_last_three_conv2d_layers
(
model
)
...
...
official/benchmark/ncf_keras_benchmark.py
View file @
9e9534e8
...
@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
...
@@ -273,25 +273,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS
.
loss_scale
=
8192
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like
(
self
):
"""1 GPU using CTL and FP16 graph rewrite."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_fp16_graph_rewrite_mlperf_like
(
self
):
"""1 GPU using FP16 graph rewrite."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_run_eagerly_mlperf_like
(
self
):
def
benchmark_1_gpu_ctl_run_eagerly_mlperf_like
(
self
):
"""1 GPU using CTL with eager and distribution strategy."""
"""1 GPU using CTL with eager and distribution strategy."""
self
.
_setup
()
self
.
_setup
()
...
@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
...
@@ -378,16 +359,6 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
FLAGS
.
loss_scale
=
8192
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like
(
self
):
"""8 GPU FP16 graph rewrite using CTL."""
self
.
_setup
()
self
.
_set_8_gpu_defaults
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
class
NCFKerasBenchmarkReal
(
NCFKerasBenchmarkBase
):
class
NCFKerasBenchmarkReal
(
NCFKerasBenchmarkBase
):
"""NCF Keras throughput benchmarks."""
"""NCF Keras throughput benchmarks."""
...
...
official/benchmark/resnet_ctl_imagenet_benchmark.py
View file @
9e9534e8
...
@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark):
...
@@ -163,19 +163,6 @@ class Resnet50CtlAccuracy(CtlBenchmark):
FLAGS
.
dtype
=
'fp16'
FLAGS
.
dtype
=
'fp16'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Test Keras model with 8 GPUs and mixed precision via graph rewrite."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
256
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
start_time_sec
=
time
.
time
()
...
@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
...
@@ -251,31 +238,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS
.
dtype
=
'fp16'
FLAGS
.
dtype
=
'fp16'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_amp
(
self
):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_amp
(
self
):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_eager
(
self
):
def
benchmark_1_gpu_eager
(
self
):
"""Test Keras model with 1 GPU in pure eager mode."""
"""Test Keras model with 1 GPU in pure eager mode."""
self
.
_setup
()
self
.
_setup
()
...
@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
...
@@ -371,31 +333,6 @@ class Resnet50CtlBenchmarkBase(CtlBenchmark):
FLAGS
.
batch_size
=
128
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_amp
(
self
):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
_set_df_common
(
self
):
def
_set_df_common
(
self
):
FLAGS
.
steps_per_loop
=
500
FLAGS
.
steps_per_loop
=
500
FLAGS
.
train_epochs
=
2
FLAGS
.
train_epochs
=
2
...
...
official/benchmark/transformer_benchmark.py
View file @
9e9534e8
...
@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
...
@@ -345,26 +345,6 @@ class TransformerBigKerasAccuracy(TransformerBenchmark):
bleu_min
=
28
,
bleu_min
=
28
,
bleu_max
=
29.2
)
bleu_max
=
29.2
)
def
benchmark_8_gpu_fp16_amp
(
self
):
"""Benchmark 8 gpu with dynamic batch and fp16 with automatic mixed precision.
Should converge to 28.4 BLEU (uncased). This has not be verified yet."
"""
self
.
_setup
()
self
.
_set_data_file_flags
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
param_set
=
'big'
FLAGS
.
batch_size
=
3072
*
8
FLAGS
.
train_steps
=
20000
*
12
FLAGS
.
steps_between_evals
=
20000
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16_amp'
)
self
.
_run_and_report_benchmark
(
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
,
bleu_min
=
28
,
bleu_max
=
29
)
def
benchmark_8_gpu_static_batch_fp16
(
self
):
def
benchmark_8_gpu_static_batch_fp16
(
self
):
"""Benchmark 8 gpu with static batch and fp16.
"""Benchmark 8 gpu with static batch and fp16.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment