Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
472e2f80
Commit
472e2f80
authored
Mar 16, 2024
by
zhanggzh
Browse files
Merge remote-tracking branch 'tf_model/main'
parents
d91296eb
f3a14f85
Changes
215
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6212 additions
and
0 deletions
+6212
-0
models-2.13.1/official/benchmark/benchmark_wrappers.py
models-2.13.1/official/benchmark/benchmark_wrappers.py
+96
-0
models-2.13.1/official/benchmark/bert_benchmark.py
models-2.13.1/official/benchmark/bert_benchmark.py
+320
-0
models-2.13.1/official/benchmark/bert_benchmark_utils.py
models-2.13.1/official/benchmark/bert_benchmark_utils.py
+128
-0
models-2.13.1/official/benchmark/bert_pretrain_benchmark.py
models-2.13.1/official/benchmark/bert_pretrain_benchmark.py
+531
-0
models-2.13.1/official/benchmark/bert_squad_benchmark.py
models-2.13.1/official/benchmark/bert_squad_benchmark.py
+594
-0
models-2.13.1/official/benchmark/config_utils.py
models-2.13.1/official/benchmark/config_utils.py
+27
-0
models-2.13.1/official/benchmark/datastore/schema/benchmark_metric.json
...official/benchmark/datastore/schema/benchmark_metric.json
+56
-0
models-2.13.1/official/benchmark/datastore/schema/benchmark_run.json
....1/official/benchmark/datastore/schema/benchmark_run.json
+368
-0
models-2.13.1/official/benchmark/datastore/schema/benchmark_run_status.json
...cial/benchmark/datastore/schema/benchmark_run_status.json
+14
-0
models-2.13.1/official/benchmark/keras_benchmark.py
models-2.13.1/official/benchmark/keras_benchmark.py
+103
-0
models-2.13.1/official/benchmark/keras_cifar_benchmark.py
models-2.13.1/official/benchmark/keras_cifar_benchmark.py
+403
-0
models-2.13.1/official/benchmark/keras_imagenet_benchmark.py
models-2.13.1/official/benchmark/keras_imagenet_benchmark.py
+1848
-0
models-2.13.1/official/benchmark/models/__init__.py
models-2.13.1/official/benchmark/models/__init__.py
+0
-0
models-2.13.1/official/benchmark/models/cifar_preprocessing.py
...s-2.13.1/official/benchmark/models/cifar_preprocessing.py
+158
-0
models-2.13.1/official/benchmark/models/resnet_cifar_main.py
models-2.13.1/official/benchmark/models/resnet_cifar_main.py
+285
-0
models-2.13.1/official/benchmark/models/resnet_cifar_model.py
...ls-2.13.1/official/benchmark/models/resnet_cifar_model.py
+334
-0
models-2.13.1/official/benchmark/models/resnet_cifar_test.py
models-2.13.1/official/benchmark/models/resnet_cifar_test.py
+194
-0
models-2.13.1/official/benchmark/models/resnet_imagenet_main.py
...-2.13.1/official/benchmark/models/resnet_imagenet_main.py
+344
-0
models-2.13.1/official/benchmark/models/resnet_imagenet_test.py
...-2.13.1/official/benchmark/models/resnet_imagenet_test.py
+290
-0
models-2.13.1/official/benchmark/models/resnet_imagenet_test_tpu.py
...3.1/official/benchmark/models/resnet_imagenet_test_tpu.py
+119
-0
No files found.
Too many changes to show.
To preserve performance only
215 of 215+
files are displayed.
Plain diff
Email patch
models-2.13.1/official/benchmark/benchmark_wrappers.py
0 → 100644
View file @
472e2f80
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils to annotate and trace benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl
import
flags
from
absl
import
logging
from
absl.testing
import
flagsaver
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_multi_string
(
'benchmark_method_flags'
,
None
,
'Optional list of runtime flags of the form key=value. Specify '
'multiple times to specify different flags. These will override the FLAGS '
'object directly after hardcoded settings in individual benchmark methods '
'before they call _run_and_report benchmark. Example if we set '
'--benchmark_method_flags=train_steps=10 and a benchmark method hardcodes '
'FLAGS.train_steps=10000 and later calls _run_and_report_benchmark, '
'it
\'
ll only run for 10 steps. This is useful for '
'debugging/profiling workflows.'
)
def
enable_runtime_flags
(
decorated_func
):
"""Sets attributes from --benchmark_method_flags for method execution.
@enable_runtime_flags decorator temporarily adds flags passed in via
--benchmark_method_flags and runs the decorated function in that context.
A user can set --benchmark_method_flags=train_steps=5 to run the benchmark
method in the snippet below with FLAGS.train_steps=5 for debugging (without
modifying the benchmark code).
class ModelBenchmark():
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
# run benchmark ...
# report benchmark results ...
def benchmark_method(self):
FLAGS.train_steps = 1000
...
self._run_and_report_benchmark()
Args:
decorated_func: The method that runs the benchmark after previous setup
execution that set some flags.
Returns:
new_func: The same method which executes in a temporary context where flag
overrides from --benchmark_method_flags are active.
"""
def
runner
(
*
args
,
**
kwargs
):
"""Creates a temporary context to activate --benchmark_method_flags."""
if
FLAGS
.
benchmark_method_flags
:
saved_flag_values
=
flagsaver
.
save_flag_values
()
for
key_value
in
FLAGS
.
benchmark_method_flags
:
key
,
value
=
key_value
.
split
(
'='
,
1
)
try
:
numeric_float
=
float
(
value
)
numeric_int
=
int
(
numeric_float
)
if
abs
(
numeric_int
)
==
abs
(
numeric_float
):
flag_value
=
numeric_int
else
:
flag_value
=
numeric_float
except
ValueError
:
flag_value
=
value
logging
.
info
(
'Setting --%s=%s'
,
key
,
flag_value
)
setattr
(
FLAGS
,
key
,
flag_value
)
else
:
saved_flag_values
=
None
try
:
result
=
decorated_func
(
*
args
,
**
kwargs
)
return
result
finally
:
if
saved_flag_values
:
flagsaver
.
restore_flag_values
(
saved_flag_values
)
return
runner
models-2.13.1/official/benchmark/bert_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""
import
json
import
math
import
os
import
time
from
absl
import
flags
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
bert_benchmark_utils
as
benchmark_utils
from
official.benchmark
import
owner_utils
from
official.common
import
distribute_utils
from
official.legacy.bert
import
configs
from
official.legacy.bert
import
run_classifier
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
CLASSIFIER_TRAIN_DATA_PATH
=
'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH
=
'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH
=
'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
MODEL_CONFIG_FILE_PATH
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
# pylint: enable=line-too-long
TMP_DIR
=
os
.
getenv
(
'TMPDIR'
)
FLAGS
=
flags
.
FLAGS
class
BertClassifyBenchmarkBase
(
benchmark_utils
.
BertBenchmarkBase
):
"""Base class to hold methods common to test classes in the module."""
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
):
super
(
BertClassifyBenchmarkBase
,
self
).
__init__
(
output_dir
,
tpu
=
tpu
)
self
.
num_epochs
=
None
self
.
num_steps_per_epoch
=
None
FLAGS
.
steps_per_loop
=
1
@
flagsaver
.
flagsaver
def
_run_bert_classifier
(
self
,
callbacks
=
None
,
use_ds
=
True
):
"""Starts BERT classification task."""
with
tf
.
io
.
gfile
.
GFile
(
FLAGS
.
input_meta_data_path
,
'rb'
)
as
reader
:
input_meta_data
=
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
bert_config
=
configs
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
epochs
=
self
.
num_epochs
if
self
.
num_epochs
else
FLAGS
.
num_train_epochs
if
self
.
num_steps_per_epoch
:
steps_per_epoch
=
self
.
num_steps_per_epoch
else
:
train_data_size
=
input_meta_data
[
'train_data_size'
]
steps_per_epoch
=
int
(
train_data_size
/
FLAGS
.
train_batch_size
)
warmup_steps
=
int
(
epochs
*
steps_per_epoch
*
0.1
)
eval_steps
=
int
(
math
.
ceil
(
input_meta_data
[
'eval_data_size'
]
/
FLAGS
.
eval_batch_size
))
if
self
.
tpu
:
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
'tpu'
,
tpu_address
=
self
.
tpu
)
else
:
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
'mirrored'
if
use_ds
else
'off'
,
num_gpus
=
self
.
num_gpus
)
max_seq_length
=
input_meta_data
[
'max_seq_length'
]
train_input_fn
=
run_classifier
.
get_dataset_fn
(
FLAGS
.
train_data_path
,
max_seq_length
,
FLAGS
.
train_batch_size
,
is_training
=
True
)
eval_input_fn
=
run_classifier
.
get_dataset_fn
(
FLAGS
.
eval_data_path
,
max_seq_length
,
FLAGS
.
eval_batch_size
,
is_training
=
False
)
_
,
summary
=
run_classifier
.
run_bert_classifier
(
strategy
,
bert_config
,
input_meta_data
,
FLAGS
.
model_dir
,
epochs
,
steps_per_epoch
,
FLAGS
.
steps_per_loop
,
eval_steps
,
warmup_steps
,
FLAGS
.
learning_rate
,
FLAGS
.
init_checkpoint
,
train_input_fn
,
eval_input_fn
,
training_callbacks
=
False
,
custom_callbacks
=
callbacks
)
return
summary
class
BertClassifyBenchmarkReal
(
BertClassifyBenchmarkBase
):
"""Short benchmark performance tests for BERT model.
Tests BERT classification performance in different GPU, TPU configurations.
The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
`benchmark_(topology)_tpu_(dataset type)` for TPUs.
"""
def
__init__
(
self
,
output_dir
=
TMP_DIR
,
tpu
=
None
,
**
kwargs
):
super
(
BertClassifyBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
)
self
.
train_data_path
=
CLASSIFIER_TRAIN_DATA_PATH
self
.
eval_data_path
=
CLASSIFIER_EVAL_DATA_PATH
self
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
self
.
input_meta_data_path
=
CLASSIFIER_INPUT_META_DATA_PATH
# Since we only care about performance metrics, we limit
# the number of training steps and epochs to prevent unnecessarily
# long tests.
self
.
num_steps_per_epoch
=
100
self
.
num_epochs
=
1
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
training_summary_path
,
min_accuracy
=
0
,
max_accuracy
=
1
,
use_ds
=
True
):
"""Starts BERT performance benchmark test."""
start_time_sec
=
time
.
time
()
summary
=
self
.
_run_bert_classifier
(
callbacks
=
[
self
.
timer_callback
],
use_ds
=
use_ds
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Since we do not load from any pretrained checkpoints, we ignore all
# accuracy metrics.
summary
.
pop
(
'eval_metrics'
,
None
)
summary
[
'start_time_sec'
]
=
start_time_sec
super
(
BertClassifyBenchmarkReal
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
min_accuracy
,
max_accuracy
=
max_accuracy
)
def
benchmark_1_gpu_mrpc
(
self
):
"""Test BERT model performance with 1 GPU."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_mrpc'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
4
FLAGS
.
eval_batch_size
=
4
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
def
benchmark_1_gpu_mrpc_xla
(
self
):
"""Test BERT model performance with 1 GPU."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_mrpc_xla'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
4
FLAGS
.
eval_batch_size
=
4
FLAGS
.
enable_xla
=
True
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
def
benchmark_1_gpu_mrpc_no_dist_strat
(
self
):
"""Test BERT model performance with 1 GPU, no distribution strategy."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_mrpc_no_dist_strat'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
4
FLAGS
.
eval_batch_size
=
4
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
,
use_ds
=
False
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_8_gpu_mrpc
(
self
):
"""Test BERT model performance with 8 GPUs."""
self
.
_setup
()
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_mrpc'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu_mrpc
(
self
):
"""Test BERT model performance with 2x2 TPU."""
self
.
_setup
()
FLAGS
.
steps_per_loop
=
50
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2x2_tpu_mrpc'
)
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
train_batch_size
=
32
FLAGS
.
eval_batch_size
=
32
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
,
use_ds
=
False
)
class
BertClassifyAccuracy
(
BertClassifyBenchmarkBase
):
"""Short accuracy test for BERT model.
Tests BERT classification task model accuracy. The naming
convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def
__init__
(
self
,
output_dir
=
TMP_DIR
,
tpu
=
None
,
**
kwargs
):
self
.
train_data_path
=
CLASSIFIER_TRAIN_DATA_PATH
self
.
eval_data_path
=
CLASSIFIER_EVAL_DATA_PATH
self
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
self
.
input_meta_data_path
=
CLASSIFIER_INPUT_META_DATA_PATH
self
.
pretrained_checkpoint_path
=
PRETRAINED_CHECKPOINT_PATH
super
(
BertClassifyAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
training_summary_path
,
min_accuracy
=
0.84
,
max_accuracy
=
0.88
):
"""Starts BERT accuracy benchmark test."""
start_time_sec
=
time
.
time
()
summary
=
self
.
_run_bert_classifier
(
callbacks
=
[
self
.
timer_callback
])
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
BertClassifyAccuracy
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
min_accuracy
,
max_accuracy
=
max_accuracy
)
def
_setup
(
self
):
super
(
BertClassifyAccuracy
,
self
).
_setup
()
FLAGS
.
train_data_path
=
self
.
train_data_path
FLAGS
.
eval_data_path
=
self
.
eval_data_path
FLAGS
.
input_meta_data_path
=
self
.
input_meta_data_path
FLAGS
.
bert_config_file
=
self
.
bert_config_file
FLAGS
.
init_checkpoint
=
self
.
pretrained_checkpoint_path
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_8_gpu_mrpc
(
self
):
"""Run BERT model accuracy test with 8 GPUs.
Due to comparatively small cardinality of MRPC dataset, training
accuracy metric has high variance between trainings. As so, we
set the wide range of allowed accuracy (84% to 88%).
"""
self
.
_setup
()
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_mrpc'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
def
benchmark_8_gpu_mrpc_xla
(
self
):
"""Run BERT model accuracy test with 8 GPUs with XLA."""
self
.
_setup
()
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_mrpc_xla'
)
FLAGS
.
enable_xla
=
True
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu_mrpc
(
self
):
"""Run BERT model accuracy test on 2x2 TPU."""
self
.
_setup
()
FLAGS
.
steps_per_loop
=
50
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2x2_tpu_mrpc'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
self
.
_run_and_report_benchmark
(
summary_path
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/bert_benchmark_utils.py
0 → 100644
View file @
472e2f80
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions or classes shared between BERT benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
time
# pylint: disable=g-bad-import-order
import
numpy
as
np
from
absl
import
flags
import
tensorflow
as
tf
# pylint: enable=g-bad-import-order
from
official.utils.flags
import
core
as
flags_core
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
FLAGS
=
flags
.
FLAGS
class
BenchmarkTimerCallback
(
tf
.
keras
.
callbacks
.
Callback
):
"""Callback that records time it takes to run each batch."""
def
__init__
(
self
,
num_batches_to_skip
=
10
):
super
(
BenchmarkTimerCallback
,
self
).
__init__
()
self
.
batch_start_times
=
{}
self
.
batch_stop_times
=
{}
def
on_batch_begin
(
self
,
batch
,
logs
=
None
):
self
.
batch_start_times
[
batch
]
=
time
.
time
()
def
on_batch_end
(
self
,
batch
,
logs
=
None
):
# If there are multiple steps_per_loop, the end batch index will not be the
# same as the starting index. Use the last starting index instead.
if
batch
not
in
self
.
batch_start_times
:
batch
=
max
(
self
.
batch_start_times
.
keys
())
self
.
batch_stop_times
[
batch
]
=
time
.
time
()
def
get_examples_per_sec
(
self
,
batch_size
,
num_batches_to_skip
=
1
):
batch_durations
=
[]
for
batch
in
self
.
batch_start_times
:
if
batch
in
self
.
batch_stop_times
and
batch
>=
num_batches_to_skip
:
batch_durations
.
append
(
self
.
batch_stop_times
[
batch
]
-
self
.
batch_start_times
[
batch
])
return
batch_size
/
np
.
mean
(
batch_durations
)
def
get_startup_time
(
self
,
program_start_time
):
return
self
.
batch_start_times
[
0
]
-
program_start_time
class
BertBenchmarkBase
(
PerfZeroBenchmark
):
"""Base class to hold methods common to test classes."""
local_flags
=
None
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
,
**
kwargs
):
super
(
BertBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
self
.
num_gpus
=
8
self
.
timer_callback
=
None
def
_setup
(
self
):
"""Sets up and resets flags before each test."""
super
(
BertBenchmarkBase
,
self
).
_setup
()
self
.
timer_callback
=
BenchmarkTimerCallback
()
def
_report_benchmark
(
self
,
stats
,
wall_time_sec
,
min_accuracy
,
max_accuracy
):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from BERT models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
min_accuracy: Minimum classification accuracy constraint to verify
correctness of the model.
max_accuracy: Maximum classification accuracy constraint to verify
correctness of the model.
"""
metrics
=
[{
'name'
:
'training_loss'
,
'value'
:
stats
[
'train_loss'
],
}]
if
self
.
timer_callback
:
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
self
.
timer_callback
.
get_examples_per_sec
(
FLAGS
.
train_batch_size
*
FLAGS
.
steps_per_loop
)
})
else
:
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
0.0
,
})
if
self
.
timer_callback
and
'start_time_sec'
in
stats
:
metrics
.
append
({
'name'
:
'startup_time'
,
'value'
:
self
.
timer_callback
.
get_startup_time
(
stats
[
'start_time_sec'
])
})
if
'eval_metrics'
in
stats
:
metrics
.
append
({
'name'
:
'eval_accuracy'
,
'value'
:
stats
[
'eval_metrics'
],
'min_value'
:
min_accuracy
,
'max_value'
:
max_accuracy
,
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=
stats
[
'total_training_steps'
],
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
models-2.13.1/official/benchmark/bert_pretrain_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes benchmark testing for bert pretraining."""
# pylint: disable=line-too-long
import
json
import
os
import
time
from
typing
import
Optional
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
bert_benchmark_utils
from
official.benchmark
import
owner_utils
from
official.common
import
distribute_utils
from
official.legacy.bert
import
run_pretraining
from
official.utils.flags
import
core
as
flags_core
# Pretrain masked lanauge modeling accuracy range:
MIN_MLM_ACCURACY
=
0.635
MAX_MLM_ACCURACY
=
0.645
# Pretrain next sentence prediction accuracy range:
MIN_NSP_ACCURACY
=
0.94
MAX_NSP_ACCURACY
=
0.96
# Pretrain masked lanauge modeling accuracy range:
MIN_MLM_ACCURACY_GPU
=
0.378
MAX_MLM_ACCURACY_GPU
=
0.388
# Pretrain next sentence prediction accuracy range:
MIN_NSP_ACCURACY_GPU
=
0.82
MAX_NSP_ACCURACY_GPU
=
0.84
BERT_PRETRAIN_FILES_SEQ128
=
'gs://mlcompass-data/bert/pretraining_data/seq_128/wikipedia.tfrecord*,gs://mlcompass-data/bert/pretraining_data/seq_128/books.tfrecord*'
BERT_BASE_CONFIG_FILE
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_config.json'
FLAGS
=
flags
.
FLAGS
class
BertPretrainAccuracyBenchmark
(
bert_benchmark_utils
.
BertBenchmarkBase
):
"""Benchmark accuracy tests for BERT Pretraining."""
def
__init__
(
self
,
output_dir
:
Optional
[
str
]
=
None
,
tpu
:
Optional
[
str
]
=
None
,
**
kwargs
):
"""Inits BertPretrainAccuracyBenchmark class.
Args:
output_dir: Directory where to output e.g. log files
tpu: TPU name to use in a TPU benchmark.
**kwargs: Additional keyword arguments.
"""
super
(
BertPretrainAccuracyBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_get_distribution_strategy
(
self
,
ds_type
=
'mirrored'
):
"""Gets the distribution strategy.
Args:
ds_type: String, the distribution strategy type to be used. Can be
'mirrored', 'multi_worker_mirrored', 'tpu' and 'off'.
Returns:
A `tf.distribute.DistibutionStrategy` object.
"""
if
self
.
tpu
or
ds_type
==
'tpu'
:
return
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
'tpu'
,
tpu_address
=
self
.
tpu
)
elif
ds_type
==
'multi_worker_mirrored'
:
# Configures cluster spec for multi-worker distribution strategy.
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
return
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
ds_type
,
num_gpus
=
FLAGS
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
summary_path
:
str
,
report_accuracy
:
bool
,
ds_type
:
str
):
"""Runs and reports the benchmark given the provided configuration."""
distribution
=
self
.
_get_distribution_strategy
(
ds_type
=
ds_type
)
logging
.
info
(
'Flags: %s'
,
flags_core
.
get_nondefault_flags_as_str
())
start_time_sec
=
time
.
time
()
run_pretraining
.
run_bert_pretrain
(
strategy
=
distribution
,
custom_callbacks
=
self
.
timer_callback
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# For GPU multi-worker, the summary text file is only generated on chief
# (metrics aggregated), so only chief has to report the result.
if
tf
.
io
.
gfile
.
exists
(
summary_path
):
with
tf
.
io
.
gfile
.
GFile
(
summary_path
,
'rb'
)
as
reader
:
summary
=
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
self
.
_report_benchmark
(
summary
,
start_time_sec
,
wall_time_sec
,
report_accuracy
,
ds_type
)
def
_report_benchmark
(
self
,
summary
,
start_time_sec
,
wall_time_sec
,
report_accuracy
,
ds_type
):
metrics
=
[{
'name'
:
'train_loss'
,
'value'
:
summary
[
'train_loss'
],
},
{
'name'
:
'exp_per_second'
,
'value'
:
self
.
timer_callback
.
get_examples_per_sec
(
FLAGS
.
train_batch_size
*
FLAGS
.
steps_per_loop
)
},
{
'name'
:
'startup_time'
,
'value'
:
self
.
timer_callback
.
get_startup_time
(
start_time_sec
)
}]
if
report_accuracy
:
if
ds_type
==
'tpu'
:
min_mlm_acc
=
MIN_MLM_ACCURACY
max_mlm_acc
=
MAX_MLM_ACCURACY
min_nsp_acc
=
MIN_NSP_ACCURACY
max_nsp_acc
=
MAX_NSP_ACCURACY
else
:
min_mlm_acc
=
MIN_MLM_ACCURACY_GPU
max_mlm_acc
=
MAX_MLM_ACCURACY_GPU
min_nsp_acc
=
MIN_NSP_ACCURACY_GPU
max_nsp_acc
=
MAX_NSP_ACCURACY_GPU
metrics
.
extend
([{
'name'
:
'masked_lm_accuracy'
,
'value'
:
summary
[
'masked_lm_accuracy'
],
'min_value'
:
min_mlm_acc
,
'max_value'
:
max_mlm_acc
,
},
{
'name'
:
'next_sentence_accuracy'
,
'value'
:
summary
[
'next_sentence_accuracy'
],
'min_value'
:
min_nsp_acc
,
'max_value'
:
max_nsp_acc
,
}])
self
.
report_benchmark
(
iters
=
summary
[
'total_training_steps'
],
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_core
.
get_nondefault_flags_as_str
()})
def
_specify_common_flags
(
self
):
FLAGS
.
bert_config_file
=
BERT_BASE_CONFIG_FILE
FLAGS
.
learning_rate
=
1e-4
FLAGS
.
warmup_steps
=
10000
FLAGS
.
steps_per_loop
=
10000
FLAGS
.
input_files
=
BERT_PRETRAIN_FILES_SEQ128
FLAGS
.
max_seq_length
=
128
FLAGS
.
max_predictions_per_seq
=
20
def
_specify_tpu_common_flags
(
self
):
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
dtype
=
'bf16'
def
_specify_gpu_common_flags
(
self
):
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps
(
self
):
"""Test bert pretraining with 8x8 TPU for 500k steps."""
# This is used for accuracy test.
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
512
FLAGS
.
num_steps_per_epoch
=
500000
FLAGS
.
num_train_epochs
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_8x8_tpu_bf16_seq128_500k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Set train_summary_interval to -1 to disable training summary, because
# writing summary to gcs may fail and summaries are not needed for this
# accuracy benchmark test.
FLAGS
.
train_summary_interval
=
-
1
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
True
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_2x2_tpu_bf16_seq128_10k_steps
(
self
):
"""Test bert pretraining with 2x2 TPU for 10000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
2
FLAGS
.
train_batch_size
=
128
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_2x2_tpu_bf16_seq128_10k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_2x2_tpu_bf16_seq128_10k_steps_mlir
(
self
):
"""Test bert pretraining with 2x2 TPU with MLIR for 10000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
2
FLAGS
.
train_batch_size
=
128
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_2x2_tpu_bf16_seq128_10k_steps_mlir'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
tf
.
config
.
experimental
.
enable_mlir_bridge
()
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_4x4_tpu_bf16_seq128_10k_steps
(
self
):
"""Test bert pretraining with 4x4 TPU for 10000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
512
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
2
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_4x4_tpu_bf16_seq128_10k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_4x4_tpu_bf16_seq128_10k_steps_mlir
(
self
):
"""Test bert pretraining with 4x4 TPU with MLIR for 10000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
512
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
2
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_4x4_tpu_bf16_seq128_10k_steps_mlir'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
tf
.
config
.
experimental
.
enable_mlir_bridge
()
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_4x4_tpu_bf16_seq128_1k_steps
(
self
):
"""Test bert pretraining with 4x4 TPU for 1000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
512
FLAGS
.
warmup_steps
=
0
FLAGS
.
num_steps_per_epoch
=
1000
FLAGS
.
num_train_epochs
=
1
FLAGS
.
steps_per_loop
=
500
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_4x4_tpu_bf16_seq128_1k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_8x8_tpu_bf16_seq128_10k_steps
(
self
):
"""Test bert pretraining with 8x8 TPU for 10000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
512
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
2
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_perf_8x16_tpu_bf16_seq128_1k_steps
(
self
):
"""Test bert pretraining with 8x16 TPU for 1000 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_tpu_common_flags
()
FLAGS
.
train_batch_size
=
4096
FLAGS
.
warmup_steps
=
0
FLAGS
.
num_steps_per_epoch
=
1000
FLAGS
.
num_train_epochs
=
1
FLAGS
.
steps_per_loop
=
500
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_8x16_tpu_bf16_seq128_1k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_accuracy_1x8_gpu_fp16_seq128_15k_steps
(
self
):
"""Test bert pretraining with 8 GPU for 15k steps."""
# This is used for accuracy test.
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_common_flags
()
FLAGS
.
num_gpus
=
8
FLAGS
.
train_batch_size
=
96
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
3
FLAGS
.
steps_per_loop
=
5000
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_1x8_gpu_fp16_seq128_15k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Set train_summary_interval to -1 to disable training summary, because
# writing summary to gcs may fail and summaries are not needed for this
# accuracy benchmark test.
FLAGS
.
train_summary_interval
=
-
1
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
True
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_perf_1x1_gpu_fp16_seq128_200_steps
(
self
):
"""Test bert pretraining with 1 GPU for 200 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_common_flags
()
FLAGS
.
num_steps_per_epoch
=
200
FLAGS
.
num_train_epochs
=
1
FLAGS
.
num_gpus
=
1
FLAGS
.
train_batch_size
=
12
FLAGS
.
steps_per_loop
=
100
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_1x1_gpu_fp16_seq128_200_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_perf_1x8_gpu_fp16_seq128_200_steps
(
self
):
"""Test bert pretraining with 8 GPU for 200 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_common_flags
()
FLAGS
.
num_steps_per_epoch
=
200
FLAGS
.
num_train_epochs
=
1
FLAGS
.
num_gpus
=
8
FLAGS
.
train_batch_size
=
96
FLAGS
.
steps_per_loop
=
100
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_1x8_gpu_fp16_seq128_200_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
class
BertPretrainMultiWorkerBenchmark
(
BertPretrainAccuracyBenchmark
):
"""Bert pretrain distributed benchmark tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
,
**
kwargs
):
super
(
BertPretrainMultiWorkerBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_specify_gpu_mwms_flags
(
self
):
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
all_reduce_alg
=
'nccl'
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
num_gpus
=
8
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_accuracy_mwms_1x8_gpu_fp16_seq128_15k_steps
(
self
):
"""Test bert pretraining with 8 GPU for 15k steps."""
# This is used for accuracy test.
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_mwms_flags
()
FLAGS
.
train_batch_size
=
96
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
3
FLAGS
.
steps_per_loop
=
5000
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_mwms_1x8_gpu_fp16_seq128_15k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Set train_summary_interval to -1 to disable training summary, because
# writing summary to gcs may fail and summaries are not needed for this
# accuracy benchmark test.
FLAGS
.
train_summary_interval
=
-
1
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
True
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_accuracy_mwms_2x8_gpu_fp16_seq128_15k_steps
(
self
):
"""Test bert pretraining with 2x8 GPU for 15k steps."""
# This is used for accuracy test.
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_mwms_flags
()
# ues the same global batch size as accuracy_mwms_1x8 benchmark.
FLAGS
.
train_batch_size
=
96
FLAGS
.
num_steps_per_epoch
=
5000
FLAGS
.
num_train_epochs
=
3
FLAGS
.
steps_per_loop
=
5000
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_mwms_2x8_gpu_fp16_seq128_15k_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Set train_summary_interval to -1 to disable training summary, because
# writing summary to gcs may fail and summaries are not needed for this
# accuracy benchmark test.
FLAGS
.
train_summary_interval
=
-
1
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
True
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_perf_mwms_1x8_gpu_fp16_seq128_200_steps
(
self
):
"""Test bert pretraining with 1x8 GPU for 200 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_mwms_flags
()
FLAGS
.
num_steps_per_epoch
=
200
FLAGS
.
num_train_epochs
=
1
FLAGS
.
train_batch_size
=
96
*
1
FLAGS
.
steps_per_loop
=
100
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_mwms_1x8_gpu_fp16_seq128_200_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_perf_mwms_2x8_gpu_fp16_seq128_200_steps
(
self
):
"""Test bert pretraining with 2x8 GPU for 200 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_mwms_flags
()
FLAGS
.
num_steps_per_epoch
=
200
FLAGS
.
num_train_epochs
=
1
FLAGS
.
train_batch_size
=
96
*
2
FLAGS
.
steps_per_loop
=
100
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_mwms_2x8_gpu_fp16_seq128_200_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
@
owner_utils
.
Owner
(
'tf-dist-strat'
)
def
benchmark_perf_mwms_8x8_gpu_fp16_seq128_200_steps
(
self
):
"""Test bert pretraining with 8x8 GPU for 200 steps."""
self
.
_setup
()
self
.
_specify_common_flags
()
self
.
_specify_gpu_mwms_flags
()
FLAGS
.
num_steps_per_epoch
=
200
FLAGS
.
num_train_epochs
=
1
FLAGS
.
train_batch_size
=
96
*
8
FLAGS
.
steps_per_loop
=
100
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_perf_mwms_8x8_gpu_fp16_seq128_200_steps'
)
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
# Disable accuracy check.
self
.
_run_and_report_benchmark
(
summary_path
=
summary_path
,
report_accuracy
=
False
,
ds_type
=
FLAGS
.
distribution_strategy
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/bert_squad_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT SQuAD benchmarks and accuracy tests."""
import
json
import
os
import
time
from
absl
import
flags
from
absl
import
logging
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
bert_benchmark_utils
as
benchmark_utils
from
official.benchmark
import
owner_utils
from
official.common
import
distribute_utils
from
official.legacy.bert
import
run_squad
from
official.utils.misc
import
keras_utils
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
SQUAD_TRAIN_DATA_PATH
=
'gs://tf-perfzero-data/bert/squad/squad_train.tf_record'
SQUAD_PREDICT_FILE
=
'gs://tf-perfzero-data/bert/squad/dev-v1.1.json'
SQUAD_VOCAB_FILE
=
'gs://tf-perfzero-data/bert/squad/vocab.txt'
SQUAD_MEDIUM_INPUT_META_DATA_PATH
=
'gs://tf-perfzero-data/bert/squad/squad_medium_meta_data'
SQUAD_LONG_INPUT_META_DATA_PATH
=
'gs://tf-perfzero-data/bert/squad/squad_long_meta_data'
SQUAD_FULL_INPUT_META_DATA_PATH
=
'gs://tf-perfzero-data/bert/squad/squad_full_meta_data'
MODEL_CONFIG_FILE_PATH
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
# pylint: enable=line-too-long
TMP_DIR
=
os
.
getenv
(
'TMPDIR'
)
FLAGS
=
flags
.
FLAGS
class
BertSquadBenchmarkBase
(
benchmark_utils
.
BertBenchmarkBase
):
"""Base class to hold methods common to test classes in the module."""
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
,
**
kwargs
):
super
(
BertSquadBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_read_training_summary_from_file
(
self
):
"""Reads the training summary from a file."""
summary_path
=
os
.
path
.
join
(
FLAGS
.
model_dir
,
'summaries/training_summary.txt'
)
with
tf
.
io
.
gfile
.
GFile
(
summary_path
,
'rb'
)
as
reader
:
return
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
def
_read_input_meta_data_from_file
(
self
):
"""Reads the input metadata from a file."""
with
tf
.
io
.
gfile
.
GFile
(
FLAGS
.
input_meta_data_path
,
'rb'
)
as
reader
:
return
json
.
loads
(
reader
.
read
().
decode
(
'utf-8'
))
def
_get_distribution_strategy
(
self
,
ds_type
=
'mirrored'
):
"""Gets the distribution strategy.
Args:
ds_type: String, the distribution strategy type to be used. Can be
'mirrored', 'multi_worker_mirrored', 'tpu' and 'off'.
Returns:
A `tf.distribute.DistibutionStrategy` object.
"""
if
self
.
tpu
or
ds_type
==
'tpu'
:
return
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
'tpu'
,
tpu_address
=
self
.
tpu
)
elif
ds_type
==
'multi_worker_mirrored'
:
# Configures cluster spec for multi-worker distribution strategy.
_
=
distribute_utils
.
configure_cluster
(
FLAGS
.
worker_hosts
,
FLAGS
.
task_index
)
return
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
ds_type
,
num_gpus
=
self
.
num_gpus
,
all_reduce_alg
=
FLAGS
.
all_reduce_alg
)
def
_init_gpu_and_data_threads
(
self
):
"""Set env variables before any TF calls."""
if
FLAGS
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
FLAGS
.
per_gpu_thread_count
,
gpu_thread_mode
=
FLAGS
.
tf_gpu_thread_mode
,
num_gpus
=
self
.
num_gpus
,
datasets_num_private_threads
=
FLAGS
.
datasets_num_private_threads
)
@
flagsaver
.
flagsaver
def
_train_squad
(
self
,
run_eagerly
=
False
,
ds_type
=
'mirrored'
):
"""Runs BERT SQuAD training. Uses mirrored strategy by default."""
self
.
_init_gpu_and_data_threads
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
strategy
=
self
.
_get_distribution_strategy
(
ds_type
)
run_squad
.
train_squad
(
strategy
=
strategy
,
input_meta_data
=
input_meta_data
,
run_eagerly
=
run_eagerly
,
custom_callbacks
=
[
self
.
timer_callback
])
@
flagsaver
.
flagsaver
def
_evaluate_squad
(
self
,
ds_type
=
'mirrored'
):
"""Runs BERT SQuAD evaluation. Uses mirrored strategy by default."""
self
.
_init_gpu_and_data_threads
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
strategy
=
self
.
_get_distribution_strategy
(
ds_type
)
if
input_meta_data
.
get
(
'version_2_with_negative'
,
False
):
logging
.
error
(
'In memory evaluation result for SQuAD v2 is not accurate'
)
eval_metrics
=
run_squad
.
eval_squad
(
strategy
=
strategy
,
input_meta_data
=
input_meta_data
)
# Use F1 score as reported evaluation metric.
self
.
eval_metrics
=
eval_metrics
[
'final_f1'
]
class
BertSquadBenchmarkReal
(
BertSquadBenchmarkBase
):
"""Short benchmark performance tests for BERT SQuAD model.
Tests BERT SQuAD performance in different GPU configurations.
The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu` format for GPUs and
`benchmark_(topology)_tpu` format for TPUs.
"""
def
__init__
(
self
,
output_dir
=
TMP_DIR
,
tpu
=
None
,
**
kwargs
):
super
(
BertSquadBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_setup
(
self
):
"""Sets up the benchmark and SQuAD flags."""
super
(
BertSquadBenchmarkReal
,
self
).
_setup
()
FLAGS
.
train_data_path
=
SQUAD_TRAIN_DATA_PATH
FLAGS
.
predict_file
=
SQUAD_PREDICT_FILE
FLAGS
.
vocab_file
=
SQUAD_VOCAB_FILE
FLAGS
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
FLAGS
.
num_train_epochs
=
1
FLAGS
.
steps_per_loop
=
100
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
run_eagerly
=
False
,
ds_type
=
'mirrored'
):
"""Runs the benchmark and reports various metrics."""
if
FLAGS
.
train_batch_size
<=
4
or
run_eagerly
:
FLAGS
.
input_meta_data_path
=
SQUAD_MEDIUM_INPUT_META_DATA_PATH
else
:
FLAGS
.
input_meta_data_path
=
SQUAD_LONG_INPUT_META_DATA_PATH
start_time_sec
=
time
.
time
()
self
.
_train_squad
(
run_eagerly
=
run_eagerly
,
ds_type
=
ds_type
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
summary
=
self
.
_read_training_summary_from_file
()
summary
[
'start_time_sec'
]
=
start_time_sec
super
(
BertSquadBenchmarkReal
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
0
,
max_accuracy
=
1
)
def
benchmark_1_gpu
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad'
)
FLAGS
.
train_batch_size
=
4
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_eager
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad_eager'
)
FLAGS
.
train_batch_size
=
2
self
.
_run_and_report_benchmark
(
run_eagerly
=
True
)
def
benchmark_1_gpu_xla
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with XLA."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_xla_squad'
)
# XLA runs out of memory when running with batch size 4.
FLAGS
.
train_batch_size
=
3
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU without DS."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
self
.
_run_and_report_benchmark
(
ds_type
=
'off'
)
def
benchmark_1_gpu_eager_no_dist_strat
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with eager execution."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_eager_no_dist_strat_squad'
)
FLAGS
.
train_batch_size
=
4
self
.
_run_and_report_benchmark
(
ds_type
=
'off'
,
run_eagerly
=
True
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_8_gpu
(
self
):
"""Tests BERT SQuAD model performance with 8 GPUs."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
24
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16_eager
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU and FP16."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad_fp16_eager'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
(
run_eagerly
=
True
)
def
benchmark_1_gpu_fp16
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU and FP16."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad_fp16'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_xla_fp16
(
self
):
"""Tests BERT SQuAD model performance with 1 GPU with XLA and FP16."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_xla_squad_fp16'
)
FLAGS
.
train_batch_size
=
4
FLAGS
.
enable_xla
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16
(
self
):
"""Tests BERT SQuAD model performance with 8 GPUs."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_fp16'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_xla_fp16
(
self
):
"""Tests BERT SQuAD model performance with 8 GPUs with XLA."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_fp16'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
enable_xla
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_xla_tf32
(
self
):
"""Tests BERT SQuAD model performance with 8 GPUs with XLA using TF32."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_xla_tf32'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
enable_xla
=
True
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_xla_fp32_no_tf32
(
self
):
"""Tests BERT SQuAD model performance with 8 GPUs with XLA using FP32."""
self
.
_setup
()
tf
.
config
.
experimental
.
enable_tensor_float_32_execution
(
False
)
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_xla_fp32_no_tf32'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
enable_xla
=
True
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu
(
self
):
"""Tests BERT SQuAD model performance with 2x2 TPU."""
self
.
_setup
()
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2x2_tpu'
)
FLAGS
.
train_batch_size
=
48
FLAGS
.
predict_batch_size
=
48
FLAGS
.
mode
=
'train'
FLAGS
.
learning_rate
=
8e-5
FLAGS
.
num_train_epochs
=
1
FLAGS
.
steps_per_loop
=
100
FLAGS
.
do_lower_case
=
True
FLAGS
.
init_checkpoint
=
PRETRAINED_CHECKPOINT_PATH
self
.
_run_and_report_benchmark
()
class
BertSquadAccuracy
(
BertSquadBenchmarkBase
):
"""Short accuracy test for BERT SQuAD model.
Tests BERT SQuAD accuracy. The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu` format for GPUs and
`benchmark_(topology)_tpu` format for TPUs.
"""
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
,
**
kwargs
):
super
(
BertSquadAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_setup
(
self
):
"""Sets up the benchmark and SQuAD flags."""
super
(
BertSquadAccuracy
,
self
).
_setup
()
FLAGS
.
train_data_path
=
SQUAD_TRAIN_DATA_PATH
FLAGS
.
predict_file
=
SQUAD_PREDICT_FILE
FLAGS
.
vocab_file
=
SQUAD_VOCAB_FILE
FLAGS
.
input_meta_data_path
=
SQUAD_FULL_INPUT_META_DATA_PATH
FLAGS
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
FLAGS
.
init_checkpoint
=
PRETRAINED_CHECKPOINT_PATH
FLAGS
.
num_train_epochs
=
2
FLAGS
.
steps_per_loop
=
100
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
run_eagerly
=
False
,
ds_type
=
'mirrored'
):
"""Runs the benchmark and reports various metrics."""
start_time_sec
=
time
.
time
()
self
.
_train_squad
(
run_eagerly
=
run_eagerly
,
ds_type
=
ds_type
)
self
.
_evaluate_squad
(
ds_type
=
ds_type
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
summary
=
self
.
_read_training_summary_from_file
()
summary
[
'eval_metrics'
]
=
self
.
eval_metrics
summary
[
'start_time_sec'
]
=
start_time_sec
super
(
BertSquadAccuracy
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
0.900
,
max_accuracy
=
0.920
)
def
benchmark_1_gpu_eager
(
self
):
"""Tests BERT SQuAD model accuracy with 1 GPU with eager execution."""
self
.
_setup
()
self
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_squad_eager'
)
FLAGS
.
train_batch_size
=
4
self
.
_run_and_report_benchmark
(
ds_type
=
'off'
,
run_eagerly
=
True
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_8_gpu
(
self
):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
24
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16
(
self
):
"""Tests BERT SQuAD model accuracy with 8 GPUs and FP16."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_fp16'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_xla
(
self
):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
self
.
_setup
()
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_xla'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
enable_xla
=
True
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_2x2_tpu
(
self
):
"""Tests BERT SQuAD model accuracy with 2x2 TPU."""
self
.
_setup
()
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2x2_tpu'
)
FLAGS
.
train_batch_size
=
48
self
.
_run_and_report_benchmark
()
class
BertSquadMultiWorkerAccuracy
(
BertSquadBenchmarkBase
):
"""BERT SQuAD distributed accuracy tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
tpu
=
None
,
**
kwargs
):
super
(
BertSquadMultiWorkerAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_setup
(
self
):
"""Sets up the benchmark and SQuAD flags."""
super
(
BertSquadMultiWorkerAccuracy
,
self
).
_setup
()
FLAGS
.
train_data_path
=
SQUAD_TRAIN_DATA_PATH
FLAGS
.
predict_file
=
SQUAD_PREDICT_FILE
FLAGS
.
vocab_file
=
SQUAD_VOCAB_FILE
FLAGS
.
input_meta_data_path
=
SQUAD_FULL_INPUT_META_DATA_PATH
FLAGS
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
FLAGS
.
init_checkpoint
=
PRETRAINED_CHECKPOINT_PATH
FLAGS
.
num_train_epochs
=
2
FLAGS
.
steps_per_loop
=
100
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
use_ds
=
True
,
run_eagerly
=
False
):
"""Runs the benchmark and reports various metrics."""
start_time_sec
=
time
.
time
()
self
.
_train_squad
(
run_eagerly
=
run_eagerly
,
ds_type
=
'multi_worker_mirrored'
)
self
.
_evaluate_squad
(
ds_type
=
'multi_worker_mirrored'
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
summary
=
self
.
_read_training_summary_from_file
()
summary
[
'eval_metrics'
]
=
self
.
eval_metrics
super
(
BertSquadMultiWorkerAccuracy
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
0.900
,
max_accuracy
=
0.920
)
def
_benchmark_common
(
self
,
num_workers
,
all_reduce_alg
):
"""Common to all benchmarks in this class."""
self
.
_setup
()
num_gpus
=
8
FLAGS
.
num_gpus
=
num_gpus
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_xla
=
False
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
32
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
num_workers
,
all_reduce_alg
))
FLAGS
.
train_batch_size
=
4
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
self
.
_run_and_report_benchmark
()
def
benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
class
BertSquadMultiWorkerBenchmark
(
BertSquadBenchmarkBase
):
"""BERT SQuAD distributed benchmark tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
TMP_DIR
,
tpu
=
None
,
**
kwargs
):
super
(
BertSquadMultiWorkerBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
tpu
=
tpu
,
**
kwargs
)
def
_setup
(
self
):
"""Sets up the benchmark and SQuAD flags."""
super
(
BertSquadMultiWorkerBenchmark
,
self
).
_setup
()
FLAGS
.
train_data_path
=
SQUAD_TRAIN_DATA_PATH
FLAGS
.
predict_file
=
SQUAD_PREDICT_FILE
FLAGS
.
vocab_file
=
SQUAD_VOCAB_FILE
FLAGS
.
input_meta_data_path
=
SQUAD_FULL_INPUT_META_DATA_PATH
FLAGS
.
bert_config_file
=
MODEL_CONFIG_FILE_PATH
FLAGS
.
num_train_epochs
=
1
FLAGS
.
steps_per_loop
=
100
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
use_ds
=
True
,
run_eagerly
=
False
):
"""Runs the benchmark and reports various metrics."""
if
FLAGS
.
train_batch_size
<=
4
*
8
:
FLAGS
.
input_meta_data_path
=
SQUAD_LONG_INPUT_META_DATA_PATH
else
:
FLAGS
.
input_meta_data_path
=
SQUAD_FULL_INPUT_META_DATA_PATH
start_time_sec
=
time
.
time
()
self
.
_train_squad
(
run_eagerly
=
run_eagerly
,
ds_type
=
'multi_worker_mirrored'
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
summary
=
self
.
_read_training_summary_from_file
()
summary
[
'start_time_sec'
]
=
start_time_sec
super
(
BertSquadMultiWorkerBenchmark
,
self
).
_report_benchmark
(
stats
=
summary
,
wall_time_sec
=
wall_time_sec
,
min_accuracy
=
0
,
max_accuracy
=
1
)
def
_benchmark_common
(
self
,
num_workers
,
all_reduce_alg
):
"""Common to all benchmarks in this class."""
self
.
_setup
()
num_gpus
=
8
FLAGS
.
num_gpus
=
num_gpus
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_xla
=
False
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
32
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
num_workers
,
all_reduce_alg
))
FLAGS
.
train_batch_size
=
4
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_1_worker_fp16_ring_tweaked
(
self
):
"""8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
self
.
_benchmark_common
(
num_workers
=
1
,
all_reduce_alg
=
'ring'
)
def
benchmark_8_gpu_1_worker_fp16_nccl_tweaked
(
self
):
"""8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
num_workers
=
1
,
all_reduce_alg
=
'nccl'
)
def
benchmark_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/config_utils.py
0 → 100644
View file @
472e2f80
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmarks config utils."""
import
os
from
typing
import
Optional
def
get_config_path
(
config_file
:
str
,
base_dir
:
Optional
[
str
]
=
None
)
->
str
:
"""Gets the absolute path of the config file."""
resolved_base_dir
=
''
if
base_dir
is
None
else
base_dir
return
os
.
path
.
join
(
resolved_base_dir
,
config_file
)
models-2.13.1/official/benchmark/datastore/schema/benchmark_metric.json
0 → 100644
View file @
472e2f80
[
{
"description"
:
"The ID of the benchmark run, where this metric should tie to."
,
"mode"
:
"REQUIRED"
,
"name"
:
"run_id"
,
"type"
:
"STRING"
},
{
"description"
:
"The name of the metric, which should be descriptive. E.g. training_loss, accuracy."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The unit of the metric. E.g. MB per sec."
,
"mode"
:
"NULLABLE"
,
"name"
:
"unit"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the metric."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"FLOAT"
},
{
"description"
:
"The timestamp when the metric is recorded."
,
"mode"
:
"REQUIRED"
,
"name"
:
"timestamp"
,
"type"
:
"TIMESTAMP"
},
{
"description"
:
"The global step when this metric is recorded."
,
"mode"
:
"NULLABLE"
,
"name"
:
"global_step"
,
"type"
:
"INTEGER"
},
{
"description"
:
"Free format metadata for the extra information about the metric."
,
"mode"
:
"REPEATED"
,
"name"
:
"extras"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"mode"
:
"NULLABLE"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
]
}
]
models-2.13.1/official/benchmark/datastore/schema/benchmark_run.json
0 → 100644
View file @
472e2f80
[
{
"description"
:
"The UUID of the run for the benchmark."
,
"mode"
:
"REQUIRED"
,
"name"
:
"model_id"
,
"type"
:
"STRING"
},
{
"description"
:
"The name of the model, E.g ResNet50, LeNet-5 etc."
,
"mode"
:
"REQUIRED"
,
"name"
:
"model_name"
,
"type"
:
"STRING"
},
{
"description"
:
"The date when the test of the model is started"
,
"mode"
:
"REQUIRED"
,
"name"
:
"run_date"
,
"type"
:
"TIMESTAMP"
},
{
"description"
:
"The unique name for a test by the combination of key parameters, eg batch size, num of GPU, etc. It is hardware independent."
,
"mode"
:
"NULLABLE"
,
"name"
:
"test_id"
,
"type"
:
"STRING"
},
{
"description"
:
"The tensorflow version information."
,
"fields"
:
[
{
"description"
:
"Version of the tensorflow. E.g. 1.7.0-rc0"
,
"mode"
:
"REQUIRED"
,
"name"
:
"version"
,
"type"
:
"STRING"
},
{
"description"
:
"Git Hash of the tensorflow"
,
"mode"
:
"NULLABLE"
,
"name"
:
"git_hash"
,
"type"
:
"STRING"
},
{
"description"
:
"The channel of the tensorflow binary, eg, nightly, RC, final, custom."
,
"mode"
:
"NULLABLE"
,
"name"
:
"channel"
,
"type"
:
"STRING"
},
{
"description"
:
"Identify anything special about the build, eg CUDA 10, NCCL, MKL, etc."
,
"mode"
:
"NULLABLE"
,
"name"
:
"build_type"
,
"type"
:
"STRING"
}
],
"mode"
:
"REQUIRED"
,
"name"
:
"tensorflow_version"
,
"type"
:
"RECORD"
},
{
"description"
:
"The arbitrary attribute of the model."
,
"fields"
:
[
{
"description"
:
"The name of the attribute."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the attribute."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"attribute"
,
"type"
:
"RECORD"
},
{
"description"
:
"Environment variables when the benchmark run is executed."
,
"fields"
:
[
{
"description"
:
"The name of the variable."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the variable."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"environment_variable"
,
"type"
:
"RECORD"
},
{
"description"
:
"TF Environment variables when the benchmark run is executed."
,
"fields"
:
[
{
"description"
:
"The name of the variable."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the variable."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"tensorflow_environment_variables"
,
"type"
:
"RECORD"
},
{
"description"
:
"The list of parameters run with the model. It could contain hyperparameters or others."
,
"fields"
:
[
{
"description"
:
"The name of the parameter."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The string value of the parameter."
,
"mode"
:
"NULLABLE"
,
"name"
:
"string_value"
,
"type"
:
"STRING"
},
{
"description"
:
"The bool value of the parameter."
,
"mode"
:
"NULLABLE"
,
"name"
:
"bool_value"
,
"type"
:
"STRING"
},
{
"description"
:
"The int/long value of the parameter."
,
"mode"
:
"NULLABLE"
,
"name"
:
"long_value"
,
"type"
:
"INTEGER"
},
{
"description"
:
"The double/float value of parameter."
,
"mode"
:
"NULLABLE"
,
"name"
:
"float_value"
,
"type"
:
"FLOAT"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"run_parameters"
,
"type"
:
"RECORD"
},
{
"description"
:
"The dataset that run with the benchmark."
,
"mode"
:
"NULLABLE"
,
"name"
:
"dataset"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"description"
:
"The name of the dataset that the model is trained/validated with. E.g ImageNet, mnist."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The arbitrary attribute of the dataset."
,
"fields"
:
[
{
"description"
:
"The name of the attribute."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the attribute."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"attribute"
,
"type"
:
"RECORD"
}
]
},
{
"description"
:
"Used to differentiate from AWS, GCE or DGX-1 at a high level"
,
"mode"
:
"NULLABLE"
,
"name"
:
"test_environment"
,
"type"
:
"STRING"
},
{
"description"
:
"The machine configuration of the benchmark run."
,
"mode"
:
"NULLABLE"
,
"name"
:
"machine_config"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"description"
:
"The platform information of the benchmark run."
,
"mode"
:
"NULLABLE"
,
"name"
:
"platform_info"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"description"
:
"Eg: 64bit."
,
"mode"
:
"NULLABLE"
,
"name"
:
"bits"
,
"type"
:
"STRING"
},
{
"description"
:
"Eg: ELF."
,
"mode"
:
"NULLABLE"
,
"name"
:
"linkage"
,
"type"
:
"STRING"
},
{
"description"
:
"Eg: i386."
,
"mode"
:
"NULLABLE"
,
"name"
:
"machine"
,
"type"
:
"STRING"
},
{
"description"
:
"Eg: 3.13.0-76-generic."
,
"mode"
:
"NULLABLE"
,
"name"
:
"release"
,
"type"
:
"STRING"
},
{
"description"
:
"Eg: Linux."
,
"mode"
:
"NULLABLE"
,
"name"
:
"system"
,
"type"
:
"STRING"
},
{
"description"
:
"Eg: #120-Ubuntu SMP Mon Jan 18 15:59:10 UTC 2016."
,
"mode"
:
"NULLABLE"
,
"name"
:
"version"
,
"type"
:
"STRING"
}
]
},
{
"description"
:
"The CPU information of the benchmark run."
,
"mode"
:
"NULLABLE"
,
"name"
:
"cpu_info"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"mode"
:
"NULLABLE"
,
"name"
:
"num_cores"
,
"type"
:
"INTEGER"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"num_cores_allowed"
,
"type"
:
"INTEGER"
},
{
"description"
:
"How fast are those CPUs."
,
"mode"
:
"NULLABLE"
,
"name"
:
"mhz_per_cpu"
,
"type"
:
"FLOAT"
},
{
"description"
:
"Additional CPU info, Eg: Intel Ivybridge with HyperThreading (24 cores)."
,
"mode"
:
"NULLABLE"
,
"name"
:
"cpu_info"
,
"type"
:
"STRING"
},
{
"description"
:
"What kind of cpu scaling is enabled on the host. Eg performance, ondemand, conservative, mixed."
,
"mode"
:
"NULLABLE"
,
"name"
:
"cpu_governor"
,
"type"
:
"STRING"
},
{
"description"
:
"Cache size of the CPUs."
,
"mode"
:
"NULLABLE"
,
"name"
:
"cache_size"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"mode"
:
"NULLABLE"
,
"name"
:
"level"
,
"type"
:
"STRING"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"size"
,
"type"
:
"INTEGER"
}
]
}
]
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"gpu_info"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"mode"
:
"NULLABLE"
,
"name"
:
"count"
,
"type"
:
"INTEGER"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"model"
,
"type"
:
"STRING"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"cuda_version"
,
"type"
:
"STRING"
}
]
},
{
"description"
:
"The cloud instance inforation if the benchmark run is executed on cloud"
,
"mode"
:
"NULLABLE"
,
"name"
:
"cloud_info"
,
"type"
:
"RECORD"
,
"fields"
:
[
{
"description"
:
"The instance type, E.g. n1-standard-4."
,
"mode"
:
"NULLABLE"
,
"name"
:
"instance_type"
,
"type"
:
"STRING"
},
{
"description"
:
"The arbitrary attribute of the cloud info."
,
"fields"
:
[
{
"description"
:
"The name of the attribute."
,
"mode"
:
"REQUIRED"
,
"name"
:
"name"
,
"type"
:
"STRING"
},
{
"description"
:
"The value of the attribute."
,
"mode"
:
"NULLABLE"
,
"name"
:
"value"
,
"type"
:
"STRING"
}
],
"mode"
:
"REPEATED"
,
"name"
:
"attribute"
,
"type"
:
"RECORD"
}
]
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"memory_total"
,
"type"
:
"INTEGER"
},
{
"mode"
:
"NULLABLE"
,
"name"
:
"memory_available"
,
"type"
:
"STRING"
}
]
}
]
models-2.13.1/official/benchmark/datastore/schema/benchmark_run_status.json
0 → 100644
View file @
472e2f80
[
{
"description"
:
"The UUID of the run for the benchmark."
,
"mode"
:
"REQUIRED"
,
"name"
:
"run_id"
,
"type"
:
"STRING"
},
{
"description"
:
"The status of the run for the benchmark. Eg, running, failed, success"
,
"mode"
:
"REQUIRED"
,
"name"
:
"status"
,
"type"
:
"STRING"
}
]
\ No newline at end of file
models-2.13.1/official/benchmark/keras_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
from
official.utils.flags
import
core
as
flags_core
class
KerasBenchmark
(
PerfZeroBenchmark
):
"""Base benchmark class with methods to simplify testing."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
flag_methods
=
None
,
tpu
=
None
):
super
(
KerasBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
flag_methods
=
flag_methods
,
tpu
=
tpu
)
def
_report_benchmark
(
self
,
stats
,
wall_time_sec
,
top_1_max
=
None
,
top_1_min
=
None
,
log_steps
=
None
,
total_batch_size
=
None
,
warmup
=
1
,
start_time_sec
=
None
):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
log_steps: How often the log was created for stats['step_timestamp_log'].
total_batch_size: Global batch-size.
warmup: number of entries in stats['step_timestamp_log'] to ignore.
start_time_sec: the start time of the program in seconds since epoch
"""
metrics
=
[]
if
'accuracy_top_1'
in
stats
:
metrics
.
append
({
'name'
:
'accuracy_top_1'
,
'value'
:
stats
[
'accuracy_top_1'
],
'min_value'
:
top_1_min
,
'max_value'
:
top_1_max
})
metrics
.
append
({
'name'
:
'top_1_train_accuracy'
,
'value'
:
stats
[
'training_accuracy_top_1'
]
})
if
(
warmup
and
'step_timestamp_log'
in
stats
and
len
(
stats
[
'step_timestamp_log'
])
>
warmup
):
# first entry in the time_log is start of step 1. The rest of the
# entries are the end of each step recorded
time_log
=
stats
[
'step_timestamp_log'
]
elapsed
=
time_log
[
-
1
].
timestamp
-
time_log
[
warmup
].
timestamp
num_examples
=
(
total_batch_size
*
log_steps
*
(
len
(
time_log
)
-
warmup
-
1
))
examples_per_sec
=
num_examples
/
elapsed
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
examples_per_sec
})
if
'avg_exp_per_second'
in
stats
:
metrics
.
append
({
'name'
:
'avg_exp_per_second'
,
'value'
:
stats
[
'avg_exp_per_second'
]
})
if
start_time_sec
and
'step_timestamp_log'
in
stats
:
time_log
=
stats
[
'step_timestamp_log'
]
# time_log[0] is recorded at the beginning of the first step.
startup_time
=
time_log
[
0
].
timestamp
-
start_time_sec
metrics
.
append
({
'name'
:
'startup_time'
,
'value'
:
startup_time
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
models-2.13.1/official/benchmark/keras_cifar_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
time
from
absl
import
flags
import
tensorflow
as
tf
from
official.benchmark
import
keras_benchmark
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark.models
import
resnet_cifar_main
MIN_TOP_1_ACCURACY
=
0.929
MAX_TOP_1_ACCURACY
=
0.938
FLAGS
=
flags
.
FLAGS
CIFAR_DATA_DIR_NAME
=
'cifar-10-batches-bin'
class
Resnet56KerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Accuracy tests for ResNet56 Keras CIFAR-10."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more named
arguments before updating the constructor.
"""
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
CIFAR_DATA_DIR_NAME
)
flag_methods
=
[
resnet_cifar_main
.
define_cifar_flags
]
super
(
Resnet56KerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
def
_setup
(
self
):
super
(
Resnet56KerasAccuracy
,
self
).
_setup
()
FLAGS
.
use_tensor_lr
=
False
def
benchmark_graph_1_gpu
(
self
):
"""Test keras based model with Keras fit and distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_1_gpu'
)
FLAGS
.
dtype
=
'fp32'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Test keras based model with eager and distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_cpu
(
self
):
"""Test keras based model on CPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_dist_strat
(
self
):
"""Test keras based model on CPU without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu_no_dist_strat'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_dist_strat_run_eagerly
(
self
):
"""Test keras based model on CPU w/forced eager and no dist_strat."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu_no_dist_strat_run_eagerly'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Test keras based model with eager and no dist strat."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Test keras based model w/forced eager and no dist_strat."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_graph_1_gpu_no_dist_strat
(
self
):
"""Test keras based model with Keras fit but not distribution strategies."""
self
.
_setup
()
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
num_gpus
=
1
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_1_gpu_no_dist_strat'
)
FLAGS
.
dtype
=
'fp32'
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpu
(
self
):
"""Test keras based model with eager and distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
2
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2_gpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_graph_2_gpu
(
self
):
"""Test keras based model with Keras fit and distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
2
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
FLAGS
.
train_epochs
=
182
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_2_gpu'
)
FLAGS
.
dtype
=
'fp32'
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
stats
=
resnet_cifar_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet56KerasAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
MIN_TOP_1_ACCURACY
,
top_1_max
=
MAX_TOP_1_ACCURACY
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
class
Resnet56KerasBenchmarkBase
(
keras_benchmark
.
KerasBenchmark
):
"""Short performance tests for ResNet56 via Keras and CIFAR-10."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
):
flag_methods
=
[
resnet_cifar_main
.
define_cifar_flags
]
super
(
Resnet56KerasBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
stats
=
resnet_cifar_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet56KerasBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
def
benchmark_1_gpu
(
self
):
"""Test 1 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_xla
(
self
):
"""Test 1 gpu with xla enabled."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
False
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_xla'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_graph_1_gpu
(
self
):
"""Test 1 gpu graph."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
False
FLAGS
.
run_eagerly
=
False
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Test 1 gpu without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_graph_1_gpu_no_dist_strat
(
self
):
"""Test 1 gpu graph mode without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
False
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_1_gpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Test 1 gpu without distribution strategy and forced eager."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
128
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpu
(
self
):
"""Test 2 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
2
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
False
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2_gpu'
)
FLAGS
.
batch_size
=
128
*
2
# 2 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_graph_2_gpu
(
self
):
"""Test 2 gpu graph mode."""
self
.
_setup
()
FLAGS
.
num_gpus
=
2
FLAGS
.
enable_eager
=
False
FLAGS
.
run_eagerly
=
False
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_2_gpu'
)
FLAGS
.
batch_size
=
128
*
2
# 2 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_cpu
(
self
):
"""Test cpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
enable_eager
=
True
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu'
)
FLAGS
.
batch_size
=
128
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_graph_cpu
(
self
):
"""Test cpu graph mode."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
enable_eager
=
False
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_cpu'
)
FLAGS
.
batch_size
=
128
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_dist_strat_run_eagerly
(
self
):
"""Test cpu without distribution strategy and forced eager."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu_no_dist_strat_run_eagerly'
)
FLAGS
.
batch_size
=
128
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_dist_strat
(
self
):
"""Test cpu without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_cpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
def
benchmark_graph_cpu_no_dist_strat
(
self
):
"""Test cpu graph mode without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
enable_eager
=
False
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_graph_cpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
FLAGS
.
data_format
=
'channels_last'
self
.
_run_and_report_benchmark
()
class
Resnet56KerasBenchmarkSynth
(
Resnet56KerasBenchmarkBase
):
"""Synthetic benchmarks for ResNet56 and Keras."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
default_flags
=
{}
default_flags
[
'skip_eval'
]
=
True
default_flags
[
'use_synthetic_data'
]
=
True
default_flags
[
'train_steps'
]
=
110
default_flags
[
'log_steps'
]
=
10
default_flags
[
'use_tensor_lr'
]
=
False
super
(
Resnet56KerasBenchmarkSynth
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
)
class
Resnet56KerasBenchmarkReal
(
Resnet56KerasBenchmarkBase
):
"""Real data benchmarks for ResNet56 and Keras."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
default_flags
=
{}
default_flags
[
'skip_eval'
]
=
True
default_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
CIFAR_DATA_DIR_NAME
)
default_flags
[
'train_steps'
]
=
110
default_flags
[
'log_steps'
]
=
10
default_flags
[
'use_tensor_lr'
]
=
False
super
(
Resnet56KerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/keras_imagenet_benchmark.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
# pylint: disable=line-too-long
from
__future__
import
print_function
import
json
import
os
import
time
from
typing
import
Any
,
MutableMapping
,
Optional
from
absl
import
flags
import
tensorflow
as
tf
# pylint: disable=g-bad-import-order
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
keras_benchmark
from
official.benchmark.models
import
resnet_imagenet_main
from
official.legacy.image_classification
import
classifier_trainer
MIN_TOP_1_ACCURACY
=
0.76
MAX_TOP_1_ACCURACY
=
0.77
MOBILENET_V1_MIN_TOP_1_ACCURACY
=
0.65
MOBILENET_V1_MAX_TOP_1_ACCURACY
=
0.68
# Range of top-1 accracies for model optimization techniques.
# Each item indicates (MIN_TOP_1_ACCURACY, MAX_TOP_1_ACCURACY).
MODEL_OPTIMIZATION_TOP_1_ACCURACY
=
{
'RESNET50_FINETUNE_PRUNING'
:
(
0.76
,
0.77
),
'MOBILENET_V1_FINETUNE_PRUNING'
:
(
0.67
,
0.68
),
'MOBILENET_V1_FINETUNE_CLUSTERING'
:
(
0.68
,
0.70
)
}
FLAGS
=
flags
.
FLAGS
def
_get_classifier_parameters
(
model_variant
:
Optional
[
str
]
=
None
,
num_gpus
:
int
=
0
,
builder
:
str
=
'records'
,
skip_eval
:
bool
=
False
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs
:
int
=
90
,
steps
:
int
=
0
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
,
report_metrics
:
bool
=
True
,
batchnorm_spatial_persistent
:
bool
=
False
)
->
MutableMapping
[
str
,
Any
]:
"""Gets classifier trainer's ResNet parameters."""
params
=
{
'runtime'
:
{
'num_gpus'
:
num_gpus
,
'distribution_strategy'
:
distribution_strategy
,
'run_eagerly'
:
run_eagerly
,
'enable_xla'
:
enable_xla
,
'dataset_num_private_threads'
:
dataset_num_private_threads
,
'gpu_thread_mode'
:
gpu_thread_mode
,
'loss_scale'
:
loss_scale
,
'batchnorm_spatial_persistent'
:
batchnorm_spatial_persistent
,
},
'train_dataset'
:
{
'builder'
:
builder
,
'use_per_replica_batch_size'
:
True
,
'batch_size'
:
per_replica_batch_size
,
'image_size'
:
224
,
'dtype'
:
dtype
,
},
'validation_dataset'
:
{
'builder'
:
builder
,
'batch_size'
:
per_replica_batch_size
,
'use_per_replica_batch_size'
:
True
,
'image_size'
:
224
,
'dtype'
:
dtype
,
},
'train'
:
{
'epochs'
:
epochs
,
'steps'
:
steps
,
'callbacks'
:
{
'enable_tensorboard'
:
False
,
'enable_checkpoint_and_export'
:
False
,
'enable_time_history'
:
True
,
},
'metrics'
:
[
'accuracy'
]
if
report_metrics
else
[],
},
'model'
:
{
'loss'
:
{
'label_smoothing'
:
0.1
,
},
},
'evaluation'
:
{
'epochs_between_evals'
:
epochs_between_evals
,
'skip_eval'
:
skip_eval
,
},
}
if
model_variant
is
not
None
:
params
[
'model'
][
'model_params'
]
=
{
'model_name'
:
model_variant
,
}
return
params
class
Resnet50KerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for ResNet50 in Keras."""
def
__init__
(
self
,
output_dir
:
Optional
[
str
]
=
None
,
root_data_dir
:
Optional
[
str
]
=
None
,
**
kwargs
):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods
=
[
classifier_trainer
.
define_classifier_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50KerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
experiment_name
:
str
,
top_1_min
:
float
=
MIN_TOP_1_ACCURACY
,
top_1_max
:
float
=
MAX_TOP_1_ACCURACY
,
num_gpus
:
int
=
0
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs
:
int
=
90
,
steps
:
int
=
0
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
):
"""Runs and reports the benchmark given the provided configuration."""
FLAGS
.
model_type
=
'resnet'
FLAGS
.
dataset
=
'imagenet'
FLAGS
.
mode
=
'train_and_eval'
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
experiment_name
)
parameters
=
_get_classifier_parameters
(
num_gpus
=
num_gpus
,
distribution_strategy
=
distribution_strategy
,
per_replica_batch_size
=
per_replica_batch_size
,
epochs
=
epochs
,
steps
=
steps
,
epochs_between_evals
=
epochs_between_evals
,
dtype
=
dtype
,
enable_xla
=
enable_xla
,
run_eagerly
=
run_eagerly
,
gpu_thread_mode
=
gpu_thread_mode
,
dataset_num_private_threads
=
dataset_num_private_threads
,
report_metrics
=
True
,
loss_scale
=
loss_scale
,
batchnorm_spatial_persistent
=
True
)
FLAGS
.
params_override
=
json
.
dumps
(
parameters
)
total_batch_size
=
num_gpus
*
per_replica_batch_size
start_time_sec
=
time
.
time
()
stats
=
classifier_trainer
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50KerasAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
total_batch_size
,
log_steps
=
100
)
def
benchmark_8_gpu
(
self
):
"""Tests Keras model with eager, dist_strat and 8 GPUs."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu'
,
num_gpus
=
8
,
per_replica_batch_size
=
128
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float32'
)
def
benchmark_8_gpu_fp16
(
self
):
"""Tests Keras model with eager, dist_strat, 8 GPUs, and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16'
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
)
def
benchmark_xla_8_gpu_fp16
(
self
):
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16'
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
,
enable_xla
=
True
)
def
benchmark_xla_8_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_dynamic'
,
top_1_min
=
0.736
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
,
loss_scale
=
'dynamic'
)
def
_get_model_dir
(
self
,
folder_name
):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
class
MobilenetV1KerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for MobilenetV1 in Keras."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
MobilenetV1KerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
{
'model'
:
'mobilenet'
,
'optimizer'
:
'mobilenet_default'
,
'initial_learning_rate_per_sample'
:
0.00039
,
})
def
benchmark_8_gpu
(
self
):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
top_1_min
=
MOBILENET_V1_MIN_TOP_1_ACCURACY
,
top_1_max
=
MOBILENET_V1_MAX_TOP_1_ACCURACY
):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
MobilenetV1KerasAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
def
_get_model_dir
(
self
,
folder_name
):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
class
KerasClassifierBenchmarkBase
(
keras_benchmark
.
KerasBenchmark
):
"""Classifier Trainer benchmarks."""
def
__init__
(
self
,
model
,
output_dir
=
None
,
default_flags
=
None
,
tpu
=
None
,
dataset_builder
=
'records'
,
train_epochs
=
1
,
train_steps
=
110
,
data_dir
=
None
):
flag_methods
=
[
classifier_trainer
.
define_classifier_flags
]
self
.
model
=
model
self
.
dataset_builder
=
dataset_builder
self
.
train_epochs
=
train_epochs
self
.
train_steps
=
train_steps
self
.
data_dir
=
data_dir
super
(
KerasClassifierBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
,
tpu
=
tpu
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
experiment_name
:
str
,
model_variant
:
Optional
[
str
]
=
None
,
skip_steps
:
Optional
[
int
]
=
None
,
top_1_min
:
float
=
MIN_TOP_1_ACCURACY
,
top_1_max
:
float
=
MAX_TOP_1_ACCURACY
,
num_gpus
:
int
=
0
,
num_tpus
:
int
=
0
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
):
"""Runs and reports the benchmark given the provided configuration."""
FLAGS
.
model_type
=
self
.
model
FLAGS
.
dataset
=
'imagenet'
FLAGS
.
mode
=
'train_and_eval'
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
experiment_name
)
parameters
=
_get_classifier_parameters
(
model_variant
=
model_variant
,
builder
=
self
.
dataset_builder
,
skip_eval
=
True
,
num_gpus
=
num_gpus
,
distribution_strategy
=
distribution_strategy
,
per_replica_batch_size
=
per_replica_batch_size
,
epochs
=
self
.
train_epochs
,
steps
=
self
.
train_steps
,
epochs_between_evals
=
epochs_between_evals
,
dtype
=
dtype
,
enable_xla
=
enable_xla
,
gpu_thread_mode
=
gpu_thread_mode
,
dataset_num_private_threads
=
dataset_num_private_threads
,
loss_scale
=
loss_scale
,
report_metrics
=
False
,
batchnorm_spatial_persistent
=
True
)
FLAGS
.
params_override
=
json
.
dumps
(
parameters
)
if
distribution_strategy
==
'tpu'
:
total_batch_size
=
num_tpus
*
per_replica_batch_size
else
:
total_batch_size
=
num_gpus
*
per_replica_batch_size
start_time_sec
=
time
.
time
()
stats
=
classifier_trainer
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Number of logged step time entries that are excluded in performance
# report. We keep results from last 100 batches, or skip the steps based on
# input skip_steps.
warmup
=
(
skip_steps
or
(
self
.
train_steps
-
100
))
//
FLAGS
.
log_steps
super
(
KerasClassifierBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
total_batch_size
,
log_steps
=
FLAGS
.
log_steps
,
warmup
=
warmup
,
start_time_sec
=
start_time_sec
)
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Tests Keras model with 1 GPU, no distribution strategy."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat'
,
num_gpus
=
1
,
distribution_strategy
=
'off'
,
per_replica_batch_size
=
128
)
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Tests Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat_run_eagerly'
,
num_gpus
=
1
,
run_eagerly
=
True
,
distribution_strategy
=
'off'
,
per_replica_batch_size
=
64
)
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16
(
self
):
"""Tests with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16'
,
num_gpus
=
1
,
run_eagerly
=
True
,
distribution_strategy
=
'off'
,
dtype
=
'float16'
,
per_replica_batch_size
=
128
)
def
benchmark_1_gpu
(
self
):
"""Tests Keras model with 1 GPU."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
def
benchmark_xla_1_gpu
(
self
):
"""Tests Keras model with XLA and 1 GPU."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
def
benchmark_1_gpu_fp16
(
self
):
"""Tests Keras model with 1 GPU and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_fp16'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
)
def
benchmark_1_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_fp16_dynamic'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
)
def
benchmark_xla_1_gpu_fp16
(
self
):
"""Tests Keras model with XLA, 1 GPU and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
)
def
benchmark_xla_1_gpu_fp16_tweaked
(
self
):
"""Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16_tweaked'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_xla_1_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16_dynamic'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
)
def
benchmark_8_gpu
(
self
):
"""Tests Keras model with 8 GPUs."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu'
,
num_gpus
=
8
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_8_gpu_tweaked
(
self
):
"""Tests Keras model with manual config tuning and 8 GPUs."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_tweaked'
,
num_gpus
=
8
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
,
dataset_num_private_threads
=
14
)
def
benchmark_xla_8_gpu
(
self
):
"""Tests Keras model with XLA and 8 GPUs."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_xla_8_gpu_tweaked
(
self
):
"""Tests Keras model with manual config tuning, 8 GPUs, and XLA."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_tweaked'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
24
)
def
benchmark_8_gpu_fp16
(
self
):
"""Tests Keras model with 8 GPUs and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
)
def
benchmark_8_gpu_fp16_tweaked
(
self
):
"""Tests Keras model with 8 GPUs, fp16, and manual config tuning."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16_tweaked'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
40
)
def
benchmark_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16_dynamic_tweaked'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
40
)
def
benchmark_xla_8_gpu_fp16
(
self
):
"""Tests Keras model with XLA, 8 GPUs and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
)
def
benchmark_xla_8_gpu_fp16_tweaked
(
self
):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_tweaked'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
48
)
def
benchmark_xla_8_gpu_fp16_tweaked_delay_measure
(
self
):
"""Tests with manual config tuning, XLA, 8 GPUs and fp16.
Delay performance measurement for stable performance on 96 vCPU platforms.
"""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_tweaked_delay_measure'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
48
)
def
benchmark_xla_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_dynamic_tweaked'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
loss_scale
=
'dynamic'
,
dataset_num_private_threads
=
48
)
def
benchmark_2x2_tpu_bf16
(
self
):
"""Test Keras model with 2x2 TPU, bf16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_2x2_tpu_bf16'
,
dtype
=
'bfloat16'
,
num_tpus
=
8
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_2x2_tpu
(
self
):
"""Test Keras model with 2x2 TPU."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_2x2_tpu'
,
num_tpus
=
8
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_4x4_tpu_bf16
(
self
):
"""Test Keras model with 4x4 TPU, bf16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_4x4_tpu_bf16'
,
dtype
=
'bfloat16'
,
num_tpus
=
32
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_4x4_tpu
(
self
):
"""Test Keras model with 4x4 TPU."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_4x4_tpu'
,
num_tpus
=
32
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_2x2_tpu_bf16_mlir
(
self
):
"""Test Keras model with 2x2 TPU, bf16."""
self
.
_setup
()
tf
.
config
.
experimental
.
enable_mlir_bridge
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_2x2_tpu_bf16_mlir'
,
dtype
=
'bfloat16'
,
num_tpus
=
8
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_4x4_tpu_bf16_mlir
(
self
):
"""Test Keras model with 4x4 TPU, bf16."""
self
.
_setup
()
tf
.
config
.
experimental
.
enable_mlir_bridge
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_4x4_tpu_bf16_mlir'
,
dtype
=
'bfloat16'
,
num_tpus
=
32
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_8x8_tpu_bf16
(
self
):
"""Test Keras model with 8x8 TPU, bf16."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8x8_tpu_bf16'
,
dtype
=
'bfloat16'
,
num_tpus
=
128
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
64
)
def
benchmark_8x8_tpu
(
self
):
"""Test Keras model with 8x8 TPU."""
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8x8_tpu'
,
num_tpus
=
128
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
64
)
def
fill_report_object
(
self
,
stats
):
super
(
KerasClassifierBenchmarkBase
,
self
).
fill_report_object
(
stats
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50KerasBenchmarkBase
(
keras_benchmark
.
KerasBenchmark
):
"""Resnet50 benchmarks."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
tpu
=
None
):
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
super
(
Resnet50KerasBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
,
tpu
=
tpu
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
skip_steps
=
None
):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Number of logged step time entries that are excluded in performance
# report. We keep results from last 100 batches, or skip the steps based on
# input skip_steps.
warmup
=
(
skip_steps
or
(
FLAGS
.
train_steps
-
100
))
//
FLAGS
.
log_steps
super
(
Resnet50KerasBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
,
warmup
=
warmup
,
start_time_sec
=
start_time_sec
)
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly'
)
FLAGS
.
batch_size
=
64
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
explicit_gpu_placement
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked'
)
FLAGS
.
batch_size
=
64
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16
(
self
):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked
(
self
):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
explicit_gpu_placement
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Test Keras model with 1 GPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
"""Test Keras model with XLA and 1 GPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16
(
self
):
"""Test Keras model with 1 GPU and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16_dynamic
(
self
):
"""Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16_dynamic'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16
(
self
):
"""Test Keras model with XLA, 1 GPU and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16_tweaked
(
self
):
"""Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16_tweaked'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16_dynamic
(
self
):
"""Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16_dynamic'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
loss_scale
=
'dynamic'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu
(
self
):
"""Test Keras model with 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp32_no_tf32
(
self
):
"""Test Keras model with 8 GPUs.Runs in FP32 by disabling TF32 execution."""
self
.
_setup
()
tf
.
config
.
experimental
.
enable_tensor_float_32_execution
(
False
)
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp32_no_tf32'
)
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_tweaked
(
self
):
"""Test Keras model with manual config tuning and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_tweaked'
)
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
FLAGS
.
datasets_num_private_threads
=
14
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu
(
self
):
"""Test Keras model with XLA and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu'
)
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_tweaked
(
self
):
"""Test Keras model with manual config tuning, 8 GPUs, and XLA."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_tweaked'
)
FLAGS
.
batch_size
=
128
*
8
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
24
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16
(
self
):
"""Test Keras model with 8 GPUs and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16_tweaked
(
self
):
"""Test Keras model with 8 GPUs, fp16, and manual config tuning."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16_tweaked'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
40
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16_dynamic_tweaked'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
40
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_fp16
(
self
):
"""Test Keras model with XLA, 8 GPUs and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_fp16'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_fp16_tweaked
(
self
):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_fp16_tweaked'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
48
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_fp16_tweaked_delay_measure
(
self
):
"""Test with manual config tuning, XLA, 8 GPUs and fp16.
Delay performance measurement for stable performance on 96 vCPU platforms.
"""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_fp16_tweaked_delay_measure'
)
FLAGS
.
batch_size
=
256
*
8
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
48
FLAGS
.
train_steps
=
310
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_fp16_dynamic_tweaked'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
48
self
.
_run_and_report_benchmark
()
def
benchmark_2x2_tpu_bf16
(
self
):
"""Test Keras model with 2x2 TPU, bf16."""
self
.
_setup
()
FLAGS
.
dtype
=
'bf16'
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_2x2_tpu_bf16'
)
FLAGS
.
batch_size
=
1024
self
.
_run_and_report_benchmark
()
def
benchmark_4x4_tpu_bf16
(
self
):
"""Test Keras model with 4x4 TPU, bf16."""
self
.
_setup
()
FLAGS
.
dtype
=
'bf16'
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_4x4_tpu_bf16'
)
FLAGS
.
batch_size
=
4096
self
.
_run_and_report_benchmark
()
def
benchmark_8x8_tpu_bf16
(
self
):
"""Test Keras model with 8x8 TPU, bf16."""
self
.
_setup
()
FLAGS
.
dtype
=
'bf16'
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8x8_tpu_bf16'
)
FLAGS
.
batch_size
=
8192
self
.
_run_and_report_benchmark
()
def
fill_report_object
(
self
,
stats
):
super
(
Resnet50KerasBenchmarkBase
,
self
).
fill_report_object
(
stats
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50KerasBenchmarkSynth
(
KerasClassifierBenchmarkBase
):
"""Resnet50 synthetic benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
tpu
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50KerasBenchmarkSynth
,
self
).
__init__
(
model
=
'resnet'
,
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
,
dataset_builder
=
'synthetic'
,
train_epochs
=
1
,
train_steps
=
110
)
class
Resnet50KerasBenchmarkReal
(
KerasClassifierBenchmarkBase
):
"""Resnet50 real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
tpu
=
None
,
**
kwargs
):
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
=
{}
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50KerasBenchmarkReal
,
self
).
__init__
(
model
=
'resnet'
,
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
,
dataset_builder
=
'records'
,
train_epochs
=
1
,
train_steps
=
110
,
data_dir
=
data_dir
)
class
EfficientNetKerasBenchmarkReal
(
KerasClassifierBenchmarkBase
):
"""EfficientNet real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
tpu
=
None
,
**
kwargs
):
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
=
{}
def_flags
[
'log_steps'
]
=
10
super
(
EfficientNetKerasBenchmarkReal
,
self
).
__init__
(
model
=
'efficientnet'
,
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
,
dataset_builder
=
'records'
,
train_epochs
=
1
,
train_steps
=
110
,
data_dir
=
data_dir
)
def
benchmark_2x2_tpu_b7_bf16
(
self
):
self
.
_setup
()
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_b7_2x2_tpu_bf16'
,
model_variant
=
'efficientnet-b7'
,
dtype
=
'bfloat16'
,
num_tpus
=
8
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
class
Resnet50KerasBenchmarkRemoteData
(
Resnet50KerasBenchmarkBase
):
"""Resnet50 real data (stored in remote storage) benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
# Defining multiple epochs overrides the train_steps setting in benchmarks.
def_flags
[
'train_epochs'
]
=
2
# Cache dataset so performance is stable after the first epoch.
def_flags
[
'training_dataset_cache'
]
=
True
def_flags
[
'log_steps'
]
=
100
# Note that for single GPU and pure eager tests which are less likely to be
# input bound and more stable, these tests will run for shorter time by
# overriding FLAGS.train_epochs, train_seteps, log_steps in benchmark
# methods, and skip_steps in _run_and_report_benchmark().
super
(
Resnet50KerasBenchmarkRemoteData
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
def
_override_flags_to_run_test_shorter
(
self
):
FLAGS
.
train_epochs
=
1
FLAGS
.
train_steps
=
300
FLAGS
.
log_steps
=
10
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly'
)
FLAGS
.
batch_size
=
64
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
explicit_gpu_placement
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked'
)
FLAGS
.
batch_size
=
64
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16
(
self
):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
128
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked
(
self
):
"""Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
explicit_gpu_placement
=
True
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
128
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Test Keras model with 1 GPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
"""Test Keras model with XLA and 1 GPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16
(
self
):
"""Test Keras model with 1 GPU and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16_dynamic
(
self
):
"""Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16_dynamic'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
loss_scale
=
'dynamic'
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16
(
self
):
"""Test Keras model with XLA, 1 GPU and fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16_tweaked
(
self
):
"""Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16_tweaked'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_fp16_dynamic
(
self
):
"""Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_fp16_dynamic'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
batch_size
=
256
FLAGS
.
loss_scale
=
'dynamic'
self
.
_override_flags_to_run_test_shorter
()
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
if
FLAGS
.
num_gpus
==
1
or
FLAGS
.
run_eagerly
:
# For single GPU and pure eager tests which are less likely to be input
# bound and more stable, run for shorter time and use the default
# skip_steps.
skip_steps
=
None
else
:
# skip the first epoch for performance measurement.
skip_steps
=
600
super
(
Resnet50KerasBenchmarkRemoteData
,
self
).
_run_and_report_benchmark
(
skip_steps
=
skip_steps
)
class
TrivialKerasBenchmarkReal
(
keras_benchmark
.
KerasBenchmark
):
"""Trivial model with real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
def_flags
=
{}
def_flags
[
'use_trivial_model'
]
=
True
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'dtype'
]
=
'fp16'
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
[
'train_steps'
]
=
600
def_flags
[
'log_steps'
]
=
100
def_flags
[
'distribution_strategy'
]
=
'mirrored'
super
(
TrivialKerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
def_flags
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
TrivialKerasBenchmarkReal
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
def
benchmark_8_gpu_warmup
(
self
):
"""Dummy test that runs over an epoch to warmup the machine."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
enable_eager
=
True
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_warmup'
)
FLAGS
.
batch_size
=
256
*
8
FLAGS
.
train_steps
=
700
self
.
_run_and_report_benchmark
()
def
fill_report_object
(
self
,
stats
):
super
(
TrivialKerasBenchmarkReal
,
self
).
fill_report_object
(
stats
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50MultiWorkerKerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Resnet50 distributed accuracy tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
flag_methods
=
[
classifier_trainer
.
define_imagenet_keras_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
def
_benchmark_common
(
self
,
eager
,
num_workers
,
all_reduce_alg
):
"""Common to all benchmarks in this class."""
self
.
_setup
()
num_gpus
=
8
FLAGS
.
num_gpus
=
num_gpus
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
eager
FLAGS
.
enable_xla
=
False
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
32
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
'eager'
if
eager
else
'graph'
,
num_workers
,
all_reduce_alg
))
FLAGS
.
batch_size
=
256
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
top_1_min
=
MIN_TOP_1_ACCURACY
,
top_1_max
=
MAX_TOP_1_ACCURACY
):
start_time_sec
=
time
.
time
()
stats
=
classifier_trainer
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
def
_get_model_dir
(
self
,
folder_name
):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
def
benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
class
Resnet50MultiWorkerKerasBenchmark
(
Resnet50KerasBenchmarkBase
):
"""Resnet50 distributed benchmark tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
):
super
(
Resnet50MultiWorkerKerasBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
)
def
_benchmark_common
(
self
,
eager
,
num_workers
,
all_reduce_alg
):
"""Common to all benchmarks in this class."""
self
.
_setup
()
num_gpus
=
8
FLAGS
.
num_gpus
=
num_gpus
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
eager
FLAGS
.
enable_xla
=
False
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
datasets_num_private_threads
=
32
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
'eager'
if
eager
else
'graph'
,
num_workers
,
all_reduce_alg
))
FLAGS
.
batch_size
=
256
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
self
.
_run_and_report_benchmark
()
def
benchmark_eager_8_gpu_1_worker_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
1
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_1_worker_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
1
,
all_reduce_alg
=
'nccl'
)
def
benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
class
Resnet50MultiWorkerKerasBenchmarkSynth
(
Resnet50MultiWorkerKerasBenchmark
):
"""Resnet50 multi-worker synthetic data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'use_synthetic_data'
]
=
True
def_flags
[
'train_steps'
]
=
110
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50MultiWorkerKerasBenchmarkSynth
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
class
Resnet50MultiWorkerKerasBenchmarkReal
(
Resnet50MultiWorkerKerasBenchmark
):
"""Resnet50 multi-worker real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
[
'train_steps'
]
=
110
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50MultiWorkerKerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
# TODO(kimjaehong): It also should be also cover other metheods of model
# optimization techniques. In that time, this class will change to something
# like 'KerasModelOptimizationAccuracyBase'.
class
KerasPruningAccuracyBase
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for pruning method."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
"""A accuracy benchmark class for pruning method.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
default_flags: default flags
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
if
default_flags
is
None
:
default_flags
=
{}
default_flags
[
'pruning_method'
]
=
'polynomial_decay'
default_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
super
(
KerasPruningAccuracyBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
,
**
kwargs
)
def
benchmark_8_gpu
(
self
):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
batch_size
=
32
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
top_1_min
=
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'RESNET50_FINETUNE_PRUNING'
][
0
],
top_1_max
=
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'RESNET50_FINETUNE_PRUNING'
][
1
]):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
KerasPruningAccuracyBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
class
MobilenetV1KerasPruningAccuracy
(
KerasPruningAccuracyBase
):
"""Benchmark accuracy tests for MobilenetV1 with pruning method."""
def
__init__
(
self
,
root_data_dir
=
None
,
**
kwargs
):
default_flags
=
{
'model'
:
'mobilenet'
,
'optimizer'
:
'mobilenet_default'
,
'initial_learning_rate_per_sample'
:
0.00007
,
'pretrained_filepath'
:
tf
.
train
.
latest_checkpoint
(
os
.
path
.
join
(
root_data_dir
,
'mobilenet_v1'
)),
'pruning_begin_step'
:
0
,
'pruning_end_step'
:
100000
,
'pruning_initial_sparsity'
:
0.0
,
'pruning_final_sparsity'
:
0.5
,
'pruning_frequency'
:
100
,
}
super
(
MobilenetV1KerasPruningAccuracy
,
self
).
__init__
(
root_data_dir
=
root_data_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
_run_and_report_benchmark
(
self
):
super
(
MobilenetV1KerasPruningAccuracy
,
self
).
_run_and_report_benchmark
(
top_1_min
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_PRUNING'
][
0
],
top_1_max
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_PRUNING'
][
1
])
class
Resnet50KerasPruningAccuracy
(
KerasPruningAccuracyBase
):
"""Benchmark accuracy tests for resnet50 with pruning method."""
def
__init__
(
self
,
root_data_dir
=
None
,
**
kwargs
):
default_flags
=
{
'model'
:
'resnet50_v1.5'
,
'optimizer'
:
'mobilenet_default'
,
'initial_learning_rate_per_sample'
:
0.0000039
,
'pretrained_filepath'
:
tf
.
train
.
latest_checkpoint
(
os
.
path
.
join
(
root_data_dir
,
'resnet50'
)),
'pruning_begin_step'
:
0
,
'pruning_end_step'
:
50000
,
'pruning_initial_sparsity'
:
0.0
,
'pruning_final_sparsity'
:
0.5
,
'pruning_frequency'
:
100
,
}
super
(
Resnet50KerasPruningAccuracy
,
self
).
__init__
(
root_data_dir
=
root_data_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
_run_and_report_benchmark
(
self
):
super
(
Resnet50KerasPruningAccuracy
,
self
).
_run_and_report_benchmark
(
top_1_min
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'RESNET50_FINETUNE_PRUNING'
][
0
],
top_1_max
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'RESNET50_FINETUNE_PRUNING'
][
1
])
class
KerasPruningBenchmarkRealBase
(
Resnet50KerasBenchmarkBase
):
"""Pruning method benchmarks."""
def
__init__
(
self
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
if
default_flags
is
None
:
default_flags
=
{}
default_flags
.
update
({
'skip_eval'
:
True
,
'report_accuracy_metrics'
:
False
,
'data_dir'
:
os
.
path
.
join
(
root_data_dir
,
'imagenet'
),
'train_steps'
:
110
,
'log_steps'
:
10
,
'pruning_method'
:
'polynomial_decay'
,
'pruning_begin_step'
:
0
,
'pruning_end_step'
:
50000
,
'pruning_initial_sparsity'
:
0
,
'pruning_final_sparsity'
:
0.5
,
'pruning_frequency'
:
100
,
})
super
(
KerasPruningBenchmarkRealBase
,
self
).
__init__
(
default_flags
=
default_flags
,
**
kwargs
)
class
MobilenetV1KerasPruningBenchmarkReal
(
KerasPruningBenchmarkRealBase
):
"""Pruning method benchmarks for MobilenetV1."""
def
__init__
(
self
,
**
kwargs
):
default_flags
=
{
'model'
:
'mobilenet'
,
'optimizer'
:
'mobilenet_default'
,
}
super
(
MobilenetV1KerasPruningBenchmarkReal
,
self
).
__init__
(
default_flags
=
default_flags
,
**
kwargs
)
class
Resnet50KerasPruningBenchmarkReal
(
KerasPruningBenchmarkRealBase
):
"""Pruning method benchmarks for resnet50."""
def
__init__
(
self
,
**
kwargs
):
default_flags
=
{
'model'
:
'resnet50_v1.5'
,
'optimizer'
:
'mobilenet_default'
,
}
super
(
Resnet50KerasPruningBenchmarkReal
,
self
).
__init__
(
default_flags
=
default_flags
,
**
kwargs
)
class
KerasClusteringAccuracyBase
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for clustering method."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
"""An accuracy benchmark class for clustering method.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
default_flags: default flags
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
if
default_flags
is
None
:
default_flags
=
{}
default_flags
[
'clustering_method'
]
=
'selective_clustering'
default_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
default_flags
[
'model'
]
=
'mobilenet_pretrained'
default_flags
[
'optimizer'
]
=
'mobilenet_fine_tune'
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
super
(
KerasClusteringAccuracyBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
,
**
kwargs
)
def
benchmark_8_gpu
(
self
):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
batch_size
=
32
*
8
FLAGS
.
train_epochs
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
dtype
=
'fp32'
FLAGS
.
enable_eager
=
True
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
top_1_min
=
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_CLUSTERING'
][
0
],
top_1_max
=
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_CLUSTERING'
][
1
]):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
KerasClusteringAccuracyBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
class
MobilenetV1KerasClusteringAccuracy
(
KerasClusteringAccuracyBase
):
"""Benchmark accuracy tests for MobilenetV1 with clustering method."""
def
__init__
(
self
,
root_data_dir
=
None
,
**
kwargs
):
default_flags
=
{
'model'
:
'mobilenet_pretrained'
,
'optimizer'
:
'mobilenet_fine_tune'
,
}
super
(
MobilenetV1KerasClusteringAccuracy
,
self
).
__init__
(
root_data_dir
=
root_data_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
_run_and_report_benchmark
(
self
):
super
(
MobilenetV1KerasClusteringAccuracy
,
self
).
_run_and_report_benchmark
(
top_1_min
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_CLUSTERING'
][
0
],
top_1_max
=
\
MODEL_OPTIMIZATION_TOP_1_ACCURACY
[
'MOBILENET_V1_FINETUNE_CLUSTERING'
][
1
])
class
KerasClusteringBenchmarkRealBase
(
Resnet50KerasBenchmarkBase
):
"""Clustering method benchmarks."""
def
__init__
(
self
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
if
default_flags
is
None
:
default_flags
=
{}
default_flags
.
update
({
'skip_eval'
:
True
,
'report_accuracy_metrics'
:
False
,
'data_dir'
:
os
.
path
.
join
(
root_data_dir
,
'imagenet'
),
'clustering_method'
:
'selective_clustering'
,
'train_steps'
:
110
,
'log_steps'
:
10
,
})
super
(
KerasClusteringBenchmarkRealBase
,
self
).
__init__
(
default_flags
=
default_flags
,
**
kwargs
)
class
MobilenetV1KerasClusteringBenchmarkReal
(
KerasClusteringBenchmarkRealBase
):
"""Clustering method benchmarks for MobilenetV1."""
def
__init__
(
self
,
**
kwargs
):
default_flags
=
{
'model'
:
'mobilenet_pretrained'
,
'optimizer'
:
'mobilenet_fine_tune'
,
}
super
(
MobilenetV1KerasClusteringBenchmarkReal
,
self
).
__init__
(
default_flags
=
default_flags
,
**
kwargs
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/models/__init__.py
0 → 100644
View file @
472e2f80
models-2.13.1/official/benchmark/models/cifar_preprocessing.py
0 → 100644
View file @
472e2f80
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to Cifar-10 dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
absl
import
logging
import
tensorflow
as
tf
from
official.legacy.image_classification.resnet
import
imagenet_preprocessing
HEIGHT
=
32
WIDTH
=
32
NUM_CHANNELS
=
3
_DEFAULT_IMAGE_BYTES
=
HEIGHT
*
WIDTH
*
NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES
=
_DEFAULT_IMAGE_BYTES
+
1
# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
NUM_IMAGES
=
{
'train'
:
50000
,
'validation'
:
10000
,
}
_NUM_DATA_FILES
=
5
NUM_CLASSES
=
10
def
parse_record
(
raw_record
,
is_training
,
dtype
):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
This method converts the label to one hot to fit the loss function.
Args:
raw_record: scalar Tensor tf.string containing a serialized Example protocol
buffer.
is_training: A boolean denoting whether the input is for training.
dtype: Data type to use for input images.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector
=
tf
.
io
.
decode_raw
(
raw_record
,
tf
.
uint8
)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label
=
tf
.
cast
(
record_vector
[
0
],
tf
.
int32
)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major
=
tf
.
reshape
(
record_vector
[
1
:
_RECORD_BYTES
],
[
NUM_CHANNELS
,
HEIGHT
,
WIDTH
])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image
=
tf
.
cast
(
tf
.
transpose
(
a
=
depth_major
,
perm
=
[
1
,
2
,
0
]),
tf
.
float32
)
image
=
preprocess_image
(
image
,
is_training
)
image
=
tf
.
cast
(
image
,
dtype
)
return
image
,
label
def
preprocess_image
(
image
,
is_training
):
"""Preprocess a single image of layout [height, width, depth]."""
if
is_training
:
# Resize the image to add four extra pixels on each side.
image
=
tf
.
image
.
resize_with_crop_or_pad
(
image
,
HEIGHT
+
8
,
WIDTH
+
8
)
# Randomly crop a [HEIGHT, WIDTH] section of the image.
image
=
tf
.
image
.
random_crop
(
image
,
[
HEIGHT
,
WIDTH
,
NUM_CHANNELS
])
# Randomly flip the image horizontally.
image
=
tf
.
image
.
random_flip_left_right
(
image
)
# Subtract off the mean and divide by the variance of the pixels.
image
=
tf
.
image
.
per_image_standardization
(
image
)
return
image
def
get_filenames
(
is_training
,
data_dir
):
"""Returns a list of filenames."""
assert
tf
.
io
.
gfile
.
exists
(
data_dir
),
(
'Run cifar10_download_and_extract.py first to download and extract the '
'CIFAR-10 data.'
)
if
is_training
:
return
[
os
.
path
.
join
(
data_dir
,
'data_batch_%d.bin'
%
i
)
for
i
in
range
(
1
,
_NUM_DATA_FILES
+
1
)
]
else
:
return
[
os
.
path
.
join
(
data_dir
,
'test_batch.bin'
)]
def
input_fn
(
is_training
,
data_dir
,
batch_size
,
dtype
=
tf
.
float32
,
datasets_num_private_threads
=
None
,
parse_record_fn
=
parse_record
,
input_context
=
None
,
drop_remainder
=
False
):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
Returns:
A dataset that can be used for iteration.
"""
filenames
=
get_filenames
(
is_training
,
data_dir
)
dataset
=
tf
.
data
.
FixedLengthRecordDataset
(
filenames
,
_RECORD_BYTES
)
if
input_context
:
logging
.
info
(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d'
,
input_context
.
input_pipeline_id
,
input_context
.
num_input_pipelines
)
dataset
=
dataset
.
shard
(
input_context
.
num_input_pipelines
,
input_context
.
input_pipeline_id
)
return
imagenet_preprocessing
.
process_record_dataset
(
dataset
=
dataset
,
is_training
=
is_training
,
batch_size
=
batch_size
,
shuffle_buffer
=
NUM_IMAGES
[
'train'
],
parse_record_fn
=
parse_record_fn
,
dtype
=
dtype
,
datasets_num_private_threads
=
datasets_num_private_threads
,
drop_remainder
=
drop_remainder
)
models-2.13.1/official/benchmark/models/resnet_cifar_main.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the Cifar-10 dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
official.benchmark.models
import
cifar_preprocessing
from
official.benchmark.models
import
resnet_cifar_model
from
official.benchmark.models
import
synthetic_util
from
official.common
import
distribute_utils
from
official.legacy.image_classification.resnet
import
common
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
keras_utils
LR_SCHEDULE
=
[
# (multiplier, epoch to start) tuples
(
0.1
,
91
),
(
0.01
,
136
),
(
0.001
,
182
)
]
def
learning_rate_schedule
(
current_epoch
,
current_batch
,
batches_per_epoch
,
batch_size
):
"""Handles linear scaling rule and LR decay.
Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
provided scaling factor.
Args:
current_epoch: integer, current epoch indexed from 0.
current_batch: integer, current batch in the current epoch, indexed from 0.
batches_per_epoch: integer, number of steps in an epoch.
batch_size: integer, total batch sized.
Returns:
Adjusted learning rate.
"""
del
current_batch
,
batches_per_epoch
# not used
initial_learning_rate
=
common
.
BASE_LEARNING_RATE
*
batch_size
/
128
learning_rate
=
initial_learning_rate
for
mult
,
start_epoch
in
LR_SCHEDULE
:
if
current_epoch
>=
start_epoch
:
learning_rate
=
initial_learning_rate
*
mult
else
:
break
return
learning_rate
class
LearningRateBatchScheduler
(
tf
.
keras
.
callbacks
.
Callback
):
"""Callback to update learning rate on every batch (not epoch boundaries).
N.B. Only support Keras optimizers, not TF optimizers.
Attributes:
schedule: a function that takes an epoch index and a batch index as input
(both integer, indexed from 0) and returns a new learning rate as
output (float).
"""
def
__init__
(
self
,
schedule
,
batch_size
,
steps_per_epoch
):
super
(
LearningRateBatchScheduler
,
self
).
__init__
()
self
.
schedule
=
schedule
self
.
steps_per_epoch
=
steps_per_epoch
self
.
batch_size
=
batch_size
self
.
epochs
=
-
1
self
.
prev_lr
=
-
1
def
on_epoch_begin
(
self
,
epoch
,
logs
=
None
):
if
not
hasattr
(
self
.
model
.
optimizer
,
'learning_rate'
):
raise
ValueError
(
'Optimizer must have a "learning_rate" attribute.'
)
self
.
epochs
+=
1
def
on_batch_begin
(
self
,
batch
,
logs
=
None
):
"""Executes before step begins."""
lr
=
self
.
schedule
(
self
.
epochs
,
batch
,
self
.
steps_per_epoch
,
self
.
batch_size
)
if
not
isinstance
(
lr
,
(
float
,
np
.
float32
,
np
.
float64
)):
raise
ValueError
(
'The output of the "schedule" function should be float.'
)
if
lr
!=
self
.
prev_lr
:
self
.
model
.
optimizer
.
learning_rate
=
lr
# lr should be a float here
self
.
prev_lr
=
lr
logging
.
debug
(
'Epoch %05d Batch %05d: LearningRateBatchScheduler '
'change learning rate to %s.'
,
self
.
epochs
,
batch
,
lr
)
def
run
(
flags_obj
):
"""Run ResNet Cifar-10 training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils
.
set_session_config
(
enable_xla
=
flags_obj
.
enable_xla
)
# Execute flag override logic for better model performance
if
flags_obj
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
flags_obj
.
per_gpu_thread_count
,
gpu_thread_mode
=
flags_obj
.
tf_gpu_thread_mode
,
num_gpus
=
flags_obj
.
num_gpus
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
)
common
.
set_cudnn_batchnorm_mode
()
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
)
if
dtype
==
'fp16'
:
raise
ValueError
(
'dtype fp16 is not supported in Keras. Use the default '
'value(fp32).'
)
data_format
=
flags_obj
.
data_format
if
data_format
is
None
:
data_format
=
(
'channels_first'
if
tf
.
config
.
list_physical_devices
(
'GPU'
)
else
'channels_last'
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
flags_obj
.
distribution_strategy
,
num_gpus
=
flags_obj
.
num_gpus
,
all_reduce_alg
=
flags_obj
.
all_reduce_alg
,
num_packs
=
flags_obj
.
num_packs
)
if
strategy
:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy
.
extended
.
experimental_enable_get_next_as_optional
=
(
flags_obj
.
enable_get_next_as_optional
)
strategy_scope
=
distribute_utils
.
get_strategy_scope
(
strategy
)
if
flags_obj
.
use_synthetic_data
:
synthetic_util
.
set_up_synthetic_data
()
input_fn
=
common
.
get_synth_input_fn
(
height
=
cifar_preprocessing
.
HEIGHT
,
width
=
cifar_preprocessing
.
WIDTH
,
num_channels
=
cifar_preprocessing
.
NUM_CHANNELS
,
num_classes
=
cifar_preprocessing
.
NUM_CLASSES
,
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
),
drop_remainder
=
True
)
else
:
synthetic_util
.
undo_set_up_synthetic_data
()
input_fn
=
cifar_preprocessing
.
input_fn
train_input_dataset
=
input_fn
(
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
cifar_preprocessing
.
parse_record
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
,
dtype
=
dtype
,
# Setting drop_remainder to avoid the partial batch logic in normalization
# layer, which triggers tf.where and leads to extra memory copy of input
# sizes between host and GPU.
drop_remainder
=
(
not
flags_obj
.
enable_get_next_as_optional
))
eval_input_dataset
=
None
if
not
flags_obj
.
skip_eval
:
eval_input_dataset
=
input_fn
(
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
cifar_preprocessing
.
parse_record
)
steps_per_epoch
=
(
cifar_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
lr_schedule
=
0.1
if
flags_obj
.
use_tensor_lr
:
initial_learning_rate
=
common
.
BASE_LEARNING_RATE
*
flags_obj
.
batch_size
/
128
lr_schedule
=
tf
.
keras
.
optimizers
.
schedules
.
PiecewiseConstantDecay
(
boundaries
=
list
(
p
[
1
]
*
steps_per_epoch
for
p
in
LR_SCHEDULE
),
values
=
[
initial_learning_rate
]
+
list
(
p
[
0
]
*
initial_learning_rate
for
p
in
LR_SCHEDULE
))
with
strategy_scope
:
optimizer
=
common
.
get_optimizer
(
lr_schedule
)
model
=
resnet_cifar_model
.
resnet56
(
classes
=
cifar_preprocessing
.
NUM_CLASSES
)
model
.
compile
(
loss
=
'sparse_categorical_crossentropy'
,
optimizer
=
optimizer
,
metrics
=
([
'sparse_categorical_accuracy'
]
if
flags_obj
.
report_accuracy_metrics
else
None
),
run_eagerly
=
flags_obj
.
run_eagerly
)
train_epochs
=
flags_obj
.
train_epochs
callbacks
=
common
.
get_callbacks
()
if
not
flags_obj
.
use_tensor_lr
:
lr_callback
=
LearningRateBatchScheduler
(
schedule
=
learning_rate_schedule
,
batch_size
=
flags_obj
.
batch_size
,
steps_per_epoch
=
steps_per_epoch
)
callbacks
.
append
(
lr_callback
)
# if mutliple epochs, ignore the train_steps flag.
if
train_epochs
<=
1
and
flags_obj
.
train_steps
:
steps_per_epoch
=
min
(
flags_obj
.
train_steps
,
steps_per_epoch
)
train_epochs
=
1
num_eval_steps
=
(
cifar_preprocessing
.
NUM_IMAGES
[
'validation'
]
//
flags_obj
.
batch_size
)
validation_data
=
eval_input_dataset
if
flags_obj
.
skip_eval
:
if
flags_obj
.
set_learning_phase_to_train
:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf
.
keras
.
backend
.
set_learning_phase
(
1
)
num_eval_steps
=
None
validation_data
=
None
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device
=
tf
.
device
(
'/device:GPU:0'
)
no_dist_strat_device
.
__enter__
()
history
=
model
.
fit
(
train_input_dataset
,
epochs
=
train_epochs
,
steps_per_epoch
=
steps_per_epoch
,
callbacks
=
callbacks
,
validation_steps
=
num_eval_steps
,
validation_data
=
validation_data
,
validation_freq
=
flags_obj
.
epochs_between_evals
,
verbose
=
2
)
eval_output
=
None
if
not
flags_obj
.
skip_eval
:
eval_output
=
model
.
evaluate
(
eval_input_dataset
,
steps
=
num_eval_steps
,
verbose
=
2
)
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
no_dist_strat_device
.
__exit__
()
stats
=
common
.
build_stats
(
history
,
eval_output
,
callbacks
)
return
stats
def
define_cifar_flags
():
common
.
define_keras_flags
()
flags_core
.
set_defaults
(
data_dir
=
'/tmp/cifar10_data/cifar-10-batches-bin'
,
model_dir
=
'/tmp/cifar10_model'
,
epochs_between_evals
=
10
,
batch_size
=
128
)
def
main
(
_
):
return
run
(
flags
.
FLAGS
)
if
__name__
==
'__main__'
:
logging
.
set_verbosity
(
logging
.
INFO
)
define_cifar_flags
()
app
.
run
(
main
)
models-2.13.1/official/benchmark/models/resnet_cifar_model.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet56 model for Keras adapted from tf.keras.applications.ResNet50.
# Reference:
- [Deep Residual Learning for Image Recognition](
https://arxiv.org/abs/1512.03385)
Adapted from code contributed by BigMoyan.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
tensorflow
as
tf
BATCH_NORM_DECAY
=
0.997
BATCH_NORM_EPSILON
=
1e-5
L2_WEIGHT_DECAY
=
2e-4
def
identity_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
,
block
,
training
=
None
):
"""The identity block is the block that has no conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
"""
filters1
,
filters2
=
filters
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
3
else
:
bn_axis
=
1
conv_name_base
=
'res'
+
str
(
stage
)
+
block
+
'_branch'
bn_name_base
=
'bn'
+
str
(
stage
)
+
block
+
'_branch'
x
=
tf
.
keras
.
layers
.
Conv2D
(
filters1
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2a'
)(
input_tensor
)
x
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2a'
)(
x
,
training
=
training
)
x
=
tf
.
keras
.
layers
.
Activation
(
'relu'
)(
x
)
x
=
tf
.
keras
.
layers
.
Conv2D
(
filters2
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2b'
)(
x
)
x
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2b'
)(
x
,
training
=
training
)
x
=
tf
.
keras
.
layers
.
add
([
x
,
input_tensor
])
x
=
tf
.
keras
.
layers
.
Activation
(
'relu'
)(
x
)
return
x
def
conv_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
,
block
,
strides
=
(
2
,
2
),
training
=
None
):
"""A block that has a conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
Note that from stage 3,
the first conv layer at main path is with strides=(2, 2)
And the shortcut should have strides=(2, 2) as well
"""
filters1
,
filters2
=
filters
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
3
else
:
bn_axis
=
1
conv_name_base
=
'res'
+
str
(
stage
)
+
block
+
'_branch'
bn_name_base
=
'bn'
+
str
(
stage
)
+
block
+
'_branch'
x
=
tf
.
keras
.
layers
.
Conv2D
(
filters1
,
kernel_size
,
strides
=
strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2a'
)(
input_tensor
)
x
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2a'
)(
x
,
training
=
training
)
x
=
tf
.
keras
.
layers
.
Activation
(
'relu'
)(
x
)
x
=
tf
.
keras
.
layers
.
Conv2D
(
filters2
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2b'
)(
x
)
x
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2b'
)(
x
,
training
=
training
)
shortcut
=
tf
.
keras
.
layers
.
Conv2D
(
filters2
,
(
1
,
1
),
strides
=
strides
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'1'
)(
input_tensor
)
shortcut
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'1'
)(
shortcut
,
training
=
training
)
x
=
tf
.
keras
.
layers
.
add
([
x
,
shortcut
])
x
=
tf
.
keras
.
layers
.
Activation
(
'relu'
)(
x
)
return
x
def
resnet_block
(
input_tensor
,
size
,
kernel_size
,
filters
,
stage
,
conv_strides
=
(
2
,
2
),
training
=
None
):
"""A block which applies conv followed by multiple identity blocks.
Arguments:
input_tensor: input tensor
size: integer, number of constituent conv/identity building blocks. A conv
block is applied once, followed by (size - 1) identity blocks.
kernel_size: default 3, the kernel size of middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
conv_strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor after applying conv and identity blocks.
"""
x
=
conv_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
=
stage
,
strides
=
conv_strides
,
block
=
'block_0'
,
training
=
training
)
for
i
in
range
(
size
-
1
):
x
=
identity_building_block
(
x
,
kernel_size
,
filters
,
stage
=
stage
,
block
=
'block_%d'
%
(
i
+
1
),
training
=
training
)
return
x
def
resnet
(
num_blocks
,
classes
=
10
,
training
=
None
):
"""Instantiates the ResNet architecture.
Arguments:
num_blocks: integer, the number of conv/identity blocks in each block. The
ResNet contains 3 blocks with each block containing one conv block
followed by (layers_per_block - 1) number of idenity blocks. Each
conv/idenity block has 2 convolutional layers. With the input
convolutional layer and the pooling layer towards the end, this brings the
total size of the network to (6*num_blocks + 2)
classes: optional number of classes to classify images into
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
A Keras model instance.
"""
input_shape
=
(
32
,
32
,
3
)
img_input
=
tf
.
keras
.
Input
(
shape
=
input_shape
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_first'
:
x
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
tf
.
keras
.
backend
.
permute_dimensions
(
x
,
(
0
,
3
,
1
,
2
)),
name
=
'transpose'
)(
img_input
)
bn_axis
=
1
else
:
# channel_last
x
=
img_input
bn_axis
=
3
x
=
tf
.
keras
.
layers
.
ZeroPadding2D
(
padding
=
(
1
,
1
),
name
=
'conv1_pad'
)(
x
)
x
=
tf
.
keras
.
layers
.
Conv2D
(
16
,
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
'conv1'
)(
x
)
x
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
'bn_conv1'
,
)(
x
,
training
=
training
)
x
=
tf
.
keras
.
layers
.
Activation
(
'relu'
)(
x
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
16
,
16
],
stage
=
2
,
conv_strides
=
(
1
,
1
),
training
=
training
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
32
,
32
],
stage
=
3
,
conv_strides
=
(
2
,
2
),
training
=
training
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
64
,
64
],
stage
=
4
,
conv_strides
=
(
2
,
2
),
training
=
training
)
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
rm_axes
=
[
1
,
2
]
else
:
rm_axes
=
[
2
,
3
]
x
=
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
tf
.
keras
.
backend
.
mean
(
x
,
rm_axes
),
name
=
'reduce_mean'
)(
x
)
x
=
tf
.
keras
.
layers
.
Dense
(
classes
,
activation
=
'softmax'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
bias_regularizer
=
tf
.
keras
.
regularizers
.
L2
(
L2_WEIGHT_DECAY
),
name
=
'fc10'
)(
x
)
inputs
=
img_input
# Create model.
model
=
tf
.
keras
.
models
.
Model
(
inputs
,
x
,
name
=
'resnet56'
)
return
model
resnet20
=
functools
.
partial
(
resnet
,
num_blocks
=
3
)
resnet32
=
functools
.
partial
(
resnet
,
num_blocks
=
5
)
resnet56
=
functools
.
partial
(
resnet
,
num_blocks
=
9
)
resnet10
=
functools
.
partial
(
resnet
,
num_blocks
=
110
)
models-2.13.1/official/benchmark/models/resnet_cifar_test.py
0 → 100644
View file @
472e2f80
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with Cifar data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tempfile
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
tensorflow.python.platform
import
googletest
from
official.benchmark.models
import
cifar_preprocessing
from
official.benchmark.models
import
resnet_cifar_main
from
official.utils.testing
import
integration
class
KerasCifarTest
(
googletest
.
TestCase
):
"""Unit tests for Keras ResNet with Cifar."""
_extra_flags
=
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
]
_tempdir
=
None
def
get_temp_dir
(
self
):
if
not
self
.
_tempdir
:
self
.
_tempdir
=
tempfile
.
mkdtemp
(
dir
=
googletest
.
GetTempDir
())
return
self
.
_tempdir
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasCifarTest
,
cls
).
setUpClass
()
resnet_cifar_main
.
define_cifar_flags
()
def
setUp
(
self
):
super
(
KerasCifarTest
,
self
).
setUp
()
cifar_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
def
tearDown
(
self
):
super
(
KerasCifarTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
def
test_end_to_end_no_dist_strat
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_cifar_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_no_dist_strat
(
self
):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags
=
[
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_cifar_graph_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu
(
self
):
"""Test Keras model with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_1_gpu
(
self
):
"""Test Keras model in legacy graph mode with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-noenable_eager"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_graph_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu
(
self
):
"""Test Keras model with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_2_gpu
(
self
):
"""Test Keras model in legacy graph mode with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_graph_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
googletest
.
main
()
models-2.13.1/official/benchmark/models/resnet_imagenet_main.py
0 → 100644
View file @
472e2f80
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
import
os
# Import libraries
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.legacy.image_classification
import
test_utils
from
official.legacy.image_classification.resnet
import
common
from
official.legacy.image_classification.resnet
import
imagenet_preprocessing
from
official.legacy.image_classification.resnet
import
resnet_model
from
official.modeling
import
performance
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
model_helpers
def
_cluster_last_three_conv2d_layers
(
model
):
"""Helper method to cluster last three conv2d layers."""
import
tensorflow_model_optimization
as
tfmot
# pylint: disable=g-import-not-at-top
last_three_conv2d_layers
=
[
layer
for
layer
in
model
.
layers
if
isinstance
(
layer
,
tf
.
keras
.
layers
.
Conv2D
)
][
-
3
:]
cluster_weights
=
tfmot
.
clustering
.
keras
.
cluster_weights
centroid_initialization
=
tfmot
.
clustering
.
keras
.
CentroidInitialization
def
cluster_fn
(
layer
):
if
layer
not
in
last_three_conv2d_layers
:
return
layer
if
layer
==
last_three_conv2d_layers
[
0
]
or
\
layer
==
last_three_conv2d_layers
[
1
]:
clustered
=
cluster_weights
(
layer
,
number_of_clusters
=
256
,
\
cluster_centroids_init
=
centroid_initialization
.
LINEAR
)
print
(
'Clustered {} with 256 clusters'
.
format
(
layer
.
name
))
else
:
clustered
=
cluster_weights
(
layer
,
number_of_clusters
=
32
,
\
cluster_centroids_init
=
centroid_initialization
.
LINEAR
)
print
(
'Clustered {} with 32 clusters'
.
format
(
layer
.
name
))
return
clustered
return
tf
.
keras
.
models
.
clone_model
(
model
,
clone_function
=
cluster_fn
)
def
run
(
flags_obj
):
"""Run ResNet ImageNet training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
NotImplementedError: If some features are not currently supported.
Returns:
Dictionary of training and eval stats.
"""
# Execute flag override logic for better model performance
if
flags_obj
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
flags_obj
.
per_gpu_thread_count
,
gpu_thread_mode
=
flags_obj
.
tf_gpu_thread_mode
,
num_gpus
=
flags_obj
.
num_gpus
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
)
common
.
set_cudnn_batchnorm_mode
()
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
)
performance
.
set_mixed_precision_policy
(
flags_core
.
get_tf_dtype
(
flags_obj
))
data_format
=
flags_obj
.
data_format
if
data_format
is
None
:
data_format
=
(
'channels_first'
if
tf
.
config
.
list_physical_devices
(
'GPU'
)
else
'channels_last'
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
# Configures cluster spec for distribution strategy.
_
=
distribute_utils
.
configure_cluster
(
flags_obj
.
worker_hosts
,
flags_obj
.
task_index
)
strategy
=
distribute_utils
.
get_distribution_strategy
(
distribution_strategy
=
flags_obj
.
distribution_strategy
,
num_gpus
=
flags_obj
.
num_gpus
,
all_reduce_alg
=
flags_obj
.
all_reduce_alg
,
num_packs
=
flags_obj
.
num_packs
,
tpu_address
=
flags_obj
.
tpu
)
if
strategy
:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy
.
extended
.
experimental_enable_get_next_as_optional
=
(
flags_obj
.
enable_get_next_as_optional
)
strategy_scope
=
distribute_utils
.
get_strategy_scope
(
strategy
)
# pylint: disable=protected-access
if
flags_obj
.
use_synthetic_data
:
input_fn
=
common
.
get_synth_input_fn
(
height
=
imagenet_preprocessing
.
DEFAULT_IMAGE_SIZE
,
width
=
imagenet_preprocessing
.
DEFAULT_IMAGE_SIZE
,
num_channels
=
imagenet_preprocessing
.
NUM_CHANNELS
,
num_classes
=
imagenet_preprocessing
.
NUM_CLASSES
,
dtype
=
dtype
,
drop_remainder
=
True
)
else
:
input_fn
=
imagenet_preprocessing
.
input_fn
# When `enable_xla` is True, we always drop the remainder of the batches
# in the dataset, as XLA-GPU doesn't support dynamic shapes.
drop_remainder
=
flags_obj
.
enable_xla
# Current resnet_model.resnet50 input format is always channel-last.
# We use keras_application mobilenet model which input format is depends on
# the keras beckend image data format.
# This use_keras_image_data_format flags indicates whether image preprocessor
# output format should be same as the keras backend image data format or just
# channel-last format.
use_keras_image_data_format
=
\
(
flags_obj
.
model
==
'mobilenet'
or
flags_obj
.
model
==
'mobilenet_pretrained'
)
train_input_dataset
=
input_fn
(
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
imagenet_preprocessing
.
get_parse_record_fn
(
use_keras_image_data_format
=
use_keras_image_data_format
),
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
,
dtype
=
dtype
,
drop_remainder
=
drop_remainder
,
tf_data_experimental_slack
=
flags_obj
.
tf_data_experimental_slack
,
training_dataset_cache
=
flags_obj
.
training_dataset_cache
,
)
eval_input_dataset
=
None
if
not
flags_obj
.
skip_eval
:
eval_input_dataset
=
input_fn
(
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
imagenet_preprocessing
.
get_parse_record_fn
(
use_keras_image_data_format
=
use_keras_image_data_format
),
dtype
=
dtype
,
drop_remainder
=
drop_remainder
)
lr_schedule
=
common
.
PiecewiseConstantDecayWithWarmup
(
batch_size
=
flags_obj
.
batch_size
,
epoch_size
=
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
],
warmup_epochs
=
common
.
LR_SCHEDULE
[
0
][
1
],
boundaries
=
list
(
p
[
1
]
for
p
in
common
.
LR_SCHEDULE
[
1
:]),
multipliers
=
list
(
p
[
0
]
for
p
in
common
.
LR_SCHEDULE
),
compute_lr_on_cpu
=
True
)
steps_per_epoch
=
(
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
with
strategy_scope
:
if
flags_obj
.
optimizer
==
'resnet50_default'
:
optimizer
=
common
.
get_optimizer
(
lr_schedule
)
elif
flags_obj
.
optimizer
==
'mobilenet_default'
or
flags_obj
.
optimizer
==
'mobilenet_fine_tune'
:
initial_learning_rate
=
\
flags_obj
.
initial_learning_rate_per_sample
*
flags_obj
.
batch_size
if
flags_obj
.
optimizer
==
'mobilenet_fine_tune'
:
initial_learning_rate
=
1e-5
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
,
decay_steps
=
steps_per_epoch
*
flags_obj
.
num_epochs_per_decay
,
decay_rate
=
flags_obj
.
lr_decay_factor
,
staircase
=
True
),
momentum
=
0.9
)
optimizer
=
performance
.
configure_optimizer
(
optimizer
,
use_float16
=
flags_core
.
get_tf_dtype
(
flags_obj
)
==
tf
.
float16
,
loss_scale
=
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
128
),)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
if
flags_obj
.
use_trivial_model
:
model
=
test_utils
.
trivial_model
(
imagenet_preprocessing
.
NUM_CLASSES
)
elif
flags_obj
.
model
==
'resnet50_v1.5'
:
model
=
resnet_model
.
resnet50
(
num_classes
=
imagenet_preprocessing
.
NUM_CLASSES
)
elif
flags_obj
.
model
==
'mobilenet'
or
flags_obj
.
model
==
'mobilenet_pretrained'
:
# TODO(kimjaehong): Remove layers attribute when minimum TF version
# support 2.0 layers by default.
if
flags_obj
.
model
==
'mobilenet_pretrained'
:
classes_labels
=
1000
initial_weights
=
'imagenet'
else
:
classes_labels
=
imagenet_preprocessing
.
NUM_CLASSES
initial_weights
=
None
model
=
tf
.
keras
.
applications
.
mobilenet
.
MobileNet
(
weights
=
initial_weights
,
classes
=
classes_labels
,
layers
=
tf
.
keras
.
layers
)
if
flags_obj
.
pretrained_filepath
:
model
.
load_weights
(
flags_obj
.
pretrained_filepath
)
if
flags_obj
.
pruning_method
==
'polynomial_decay'
:
import
tensorflow_model_optimization
as
tfmot
# pylint: disable=g-import-not-at-top
if
dtype
!=
tf
.
float32
:
raise
NotImplementedError
(
'Pruning is currently only supported on dtype=tf.float32.'
)
pruning_params
=
{
'pruning_schedule'
:
tfmot
.
sparsity
.
keras
.
PolynomialDecay
(
initial_sparsity
=
flags_obj
.
pruning_initial_sparsity
,
final_sparsity
=
flags_obj
.
pruning_final_sparsity
,
begin_step
=
flags_obj
.
pruning_begin_step
,
end_step
=
flags_obj
.
pruning_end_step
,
frequency
=
flags_obj
.
pruning_frequency
),
}
model
=
tfmot
.
sparsity
.
keras
.
prune_low_magnitude
(
model
,
**
pruning_params
)
elif
flags_obj
.
pruning_method
:
raise
NotImplementedError
(
'Only polynomial_decay is currently supported.'
)
if
flags_obj
.
clustering_method
==
'selective_clustering'
:
import
tensorflow_model_optimization
as
tfmot
# pylint: disable=g-import-not-at-top
if
dtype
!=
tf
.
float32
:
raise
NotImplementedError
(
'Clustering is currently only supported on dtype=tf.float32.'
)
model
=
_cluster_last_three_conv2d_layers
(
model
)
elif
flags_obj
.
clustering_method
:
raise
NotImplementedError
(
'Only selective_clustering is implemented.'
)
model
.
compile
(
loss
=
'sparse_categorical_crossentropy'
,
optimizer
=
optimizer
,
metrics
=
([
'sparse_categorical_accuracy'
]
if
flags_obj
.
report_accuracy_metrics
else
None
),
run_eagerly
=
flags_obj
.
run_eagerly
,
jit_compile
=
flags_obj
.
enable_xla
)
train_epochs
=
flags_obj
.
train_epochs
callbacks
=
common
.
get_callbacks
(
pruning_method
=
flags_obj
.
pruning_method
,
enable_checkpoint_and_export
=
flags_obj
.
enable_checkpoint_and_export
,
model_dir
=
flags_obj
.
model_dir
)
# If mutliple epochs, ignore the train_steps flag.
if
train_epochs
<=
1
and
flags_obj
.
train_steps
:
steps_per_epoch
=
min
(
flags_obj
.
train_steps
,
steps_per_epoch
)
train_epochs
=
1
num_eval_steps
=
(
imagenet_preprocessing
.
NUM_IMAGES
[
'validation'
]
//
flags_obj
.
batch_size
)
validation_data
=
eval_input_dataset
if
flags_obj
.
skip_eval
:
# Only build the training graph. This reduces memory usage introduced by
# control flow ops in layers that have different implementations for
# training and inference (e.g., batch norm).
if
flags_obj
.
set_learning_phase_to_train
:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf
.
keras
.
backend
.
set_learning_phase
(
1
)
num_eval_steps
=
None
validation_data
=
None
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribution strategy.
no_dist_strat_device
=
tf
.
device
(
'/device:GPU:0'
)
no_dist_strat_device
.
__enter__
()
history
=
model
.
fit
(
train_input_dataset
,
epochs
=
train_epochs
,
steps_per_epoch
=
steps_per_epoch
,
callbacks
=
callbacks
,
validation_steps
=
num_eval_steps
,
validation_data
=
validation_data
,
validation_freq
=
flags_obj
.
epochs_between_evals
,
verbose
=
2
)
eval_output
=
None
if
not
flags_obj
.
skip_eval
:
eval_output
=
model
.
evaluate
(
eval_input_dataset
,
steps
=
num_eval_steps
,
verbose
=
2
)
if
flags_obj
.
pruning_method
:
model
=
tfmot
.
sparsity
.
keras
.
strip_pruning
(
model
)
if
flags_obj
.
clustering_method
:
model
=
tfmot
.
clustering
.
keras
.
strip_clustering
(
model
)
if
flags_obj
.
enable_checkpoint_and_export
:
if
dtype
==
tf
.
bfloat16
:
logging
.
warning
(
'Keras model.save does not support bfloat16 dtype.'
)
else
:
# Keras model.save assumes a float32 input designature.
export_path
=
os
.
path
.
join
(
flags_obj
.
model_dir
,
'saved_model'
)
model
.
save
(
export_path
,
include_optimizer
=
False
)
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
no_dist_strat_device
.
__exit__
()
stats
=
common
.
build_stats
(
history
,
eval_output
,
callbacks
)
return
stats
def
define_imagenet_keras_flags
():
common
.
define_keras_flags
(
model
=
True
,
optimizer
=
True
,
pretrained_filepath
=
True
)
common
.
define_pruning_flags
()
common
.
define_clustering_flags
()
flags_core
.
set_defaults
()
flags
.
adopt_module_key_flags
(
common
)
def
main
(
_
):
model_helpers
.
apply_clean
(
flags
.
FLAGS
)
stats
=
run
(
flags
.
FLAGS
)
logging
.
info
(
'Run stats:
\n
%s'
,
stats
)
if
__name__
==
'__main__'
:
logging
.
set_verbosity
(
logging
.
INFO
)
define_imagenet_keras_flags
()
app
.
run
(
main
)
models-2.13.1/official/benchmark/models/resnet_imagenet_test.py
0 → 100644
View file @
472e2f80
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
official.benchmark.models
import
resnet_imagenet_main
from
official.legacy.image_classification.resnet
import
imagenet_preprocessing
from
official.utils.testing
import
integration
@
parameterized
.
parameters
(
"resnet"
,
# "resnet_polynomial_decay", b/151854314
"mobilenet"
,
# "mobilenet_polynomial_decay", b/151854314
"mobilenet_selective_clustering"
,
)
class
KerasImagenetTest
(
tf
.
test
.
TestCase
):
"""Unit tests for Keras Models with ImageNet."""
_default_flags_dict
=
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
,
"-data_format"
,
"channels_last"
,
]
_extra_flags_dict
=
{
"resnet"
:
[
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
],
"resnet_polynomial_decay"
:
[
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
"mobilenet"
:
[
"-model"
,
"mobilenet"
,
"-optimizer"
,
"mobilenet_default"
,
],
"mobilenet_polynomial_decay"
:
[
"-model"
,
"mobilenet"
,
"-optimizer"
,
"mobilenet_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
"mobilenet_selective_clustering"
:
[
"-model"
,
"mobilenet_pretrained"
,
"-optimizer"
,
"mobilenet_fine_tune"
,
"-clustering_method"
,
"selective_clustering"
,
]
}
_tempdir
=
None
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasImagenetTest
,
cls
).
setUpClass
()
resnet_imagenet_main
.
define_imagenet_keras_flags
()
def
setUp
(
self
):
super
(
KerasImagenetTest
,
self
).
setUp
()
imagenet_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
self
.
policy
=
tf
.
keras
.
mixed_precision
.
global_policy
()
def
tearDown
(
self
):
super
(
KerasImagenetTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
tf
.
keras
.
mixed_precision
.
set_global_policy
(
self
.
policy
)
def
get_extra_flags_dict
(
self
,
flags_key
):
return
self
.
_extra_flags_dict
[
flags_key
]
+
self
.
_default_flags_dict
def
test_end_to_end_no_dist_strat
(
self
,
flags_key
):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"off"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_no_dist_strat
(
self
,
flags_key
):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags
=
[
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"off"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu
(
self
,
flags_key
):
"""Test Keras model with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-distribution_strategy"
,
"mirrored"
,
"-enable_checkpoint_and_export"
,
"1"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with 1 GPU and fp16."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is currently not supported."
)
if
"selective_clustering"
in
extra_flags
:
self
.
skipTest
(
"Clustering with fp16 is currently not supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu
(
self
,
flags_key
):
"""Test Keras model with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_xla_2_gpu
(
self
,
flags_key
):
"""Test Keras model with XLA and 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with 2 GPUs and fp16."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is currently not supported."
)
if
"selective_clustering"
in
extra_flags
:
self
.
skipTest
(
"Clustering with fp16 is currently not supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_xla_2_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with XLA, 2 GPUs and fp16."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-dtype"
,
"fp16"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is currently not supported."
)
if
"selective_clustering"
in
extra_flags
:
self
.
skipTest
(
"Clustering with fp16 is currently not supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
models-2.13.1/official/benchmark/models/resnet_imagenet_test_tpu.py
0 → 100644
View file @
472e2f80
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data on TPU."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.benchmark.models
import
resnet_imagenet_main
from
official.legacy.image_classification.resnet
import
imagenet_preprocessing
from
official.utils.testing
import
integration
class
KerasImagenetTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
"""Unit tests for Keras Models with ImageNet."""
_extra_flags_dict
=
{
"resnet"
:
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
],
"resnet_polynomial_decay"
:
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
,
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
}
_tempdir
=
None
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasImagenetTest
,
cls
).
setUpClass
()
resnet_imagenet_main
.
define_imagenet_keras_flags
()
def
setUp
(
self
):
super
(
KerasImagenetTest
,
self
).
setUp
()
imagenet_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
self
.
policy
=
tf
.
keras
.
mixed_precision
.
global_policy
()
def
tearDown
(
self
):
super
(
KerasImagenetTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
tf
.
keras
.
mixed_precision
.
set_global_policy
(
self
.
policy
)
@
parameterized
.
parameters
([
"resnet"
,
# "resnet_polynomial_decay" b/151854314
])
def
test_end_to_end_tpu
(
self
,
flags_key
):
"""Test Keras model with TPU distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"tpu"
,
"-data_format"
,
"channels_last"
,
"-enable_checkpoint_and_export"
,
"1"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags_dict
[
flags_key
]
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
@
parameterized
.
parameters
([
"resnet"
])
def
test_end_to_end_tpu_bf16
(
self
,
flags_key
):
"""Test Keras model with TPU and bfloat16 activation."""
extra_flags
=
[
"-distribution_strategy"
,
"tpu"
,
"-data_format"
,
"channels_last"
,
"-dtype"
,
"bf16"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags_dict
[
flags_key
]
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment