Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
f0d87682
Commit
f0d87682
authored
Aug 01, 2022
by
qianyj
Browse files
update TF code
parent
eaff6662
Changes
129
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5568 additions
and
0 deletions
+5568
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
...ks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
+1493
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
...n/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
+253
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
...chmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
+129
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
...enchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
+198
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
.../benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
+67
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
...marks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
+498
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
...n/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
+251
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
...tion/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
+93
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
...ster/scripts/tf_cnn_benchmarks/leading_indicators_test.py
+1003
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
...ion/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
+260
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
...enchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
+189
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/__init__.py
...marks-master/scripts/tf_cnn_benchmarks/models/__init__.py
+0
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
...-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
+93
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
...master/scripts/tf_cnn_benchmarks/models/densenet_model.py
+100
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
...scripts/tf_cnn_benchmarks/models/experimental/__init__.py
+0
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
...ripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+449
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
..._cnn_benchmarks/models/experimental/official_ncf_model.py
+172
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
...aster/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+63
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
...aster/scripts/tf_cnn_benchmarks/models/inception_model.py
+213
-0
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
...ks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
+44
-0
No files found.
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for benchmark_cnn."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
glob
import
os
import
re
import
unittest
import
mock
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
google.protobuf
import
text_format
from
tensorflow.core.framework
import
step_stats_pb2
from
tensorflow.core.profiler
import
tfprof_log_pb2
from
tensorflow.python.platform
import
test
import
benchmark_cnn
import
datasets
import
flags
import
preprocessing
import
test_util
import
variable_mgr_util
from
platforms
import
util
as
platforms_util
def
_check_has_gpu
():
if
not
test
.
is_gpu_available
(
cuda_only
=
True
):
raise
ValueError
(
"""You have asked to run part or all of this on GPU, but it appears
that no GPU is available. If your machine has GPUs it is possible you
do not have a version of TensorFlow with GPU support. To build with GPU
support, add --config=cuda to the build flags.
\n
"""
)
class
TfCnnBenchmarksModelTest
(
tf
.
test
.
TestCase
):
"""Tests which are run with multiple models."""
def
setUp
(
self
):
super
(
TfCnnBenchmarksModelTest
,
self
).
setUp
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
get_model_name
(
self
):
return
None
# Return true to run tests that don't need to be run on every model.
# This should be done for one or two cheap models.
def
extended_tests
(
self
):
return
False
# Return false to suppress actually running the model; this is useful
# for tests that are large.
def
model_execution_test
(
self
):
return
False
# Return false to suppress actually saving and loading the model.
def
model_save_load_test
(
self
):
return
False
def
testSaveLoadModel
(
self
):
_check_has_gpu
()
if
not
self
.
get_model_name
()
or
not
self
.
model_save_load_test
():
return
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
0
,
num_inter_threads
=
0
,
distortions
=
False
,
batch_size
=
2
,
variable_update
=
'replicated'
,
num_warmup_batches
=
0
,
num_gpus
=
2
,
train_dir
=
test_util
.
get_temp_dir
(
'testSaveLoadModel_'
+
self
.
get_model_name
()))
# Run one batch and save the model.
# Note that this uses a non-test session.
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
self
.
assertEqual
(
bench
.
init_global_step
,
0
)
# Clear the default graph.
tf
.
reset_default_graph
()
# Test if checkpoint had been saved.
ckpt
=
tf
.
train
.
get_checkpoint_state
(
params
.
train_dir
)
match
=
re
.
match
(
os
.
path
.
join
(
params
.
train_dir
,
r
'model.ckpt-(\d+).index'
),
ckpt
.
model_checkpoint_path
+
'.index'
)
self
.
assertTrue
(
match
)
self
.
assertGreaterEqual
(
int
(
match
.
group
(
1
)),
params
.
num_batches
)
params
=
params
.
_replace
(
num_batches
=
2
)
# Reload the model
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
# Check if global step has been restored.
self
.
assertNotEqual
(
bench
.
init_global_step
,
0
)
ckpt
=
tf
.
train
.
get_checkpoint_state
(
params
.
train_dir
)
match
=
re
.
match
(
os
.
path
.
join
(
params
.
train_dir
,
r
'model.ckpt-(\d+).index'
),
ckpt
.
model_checkpoint_path
+
'.index'
)
self
.
assertTrue
(
match
)
self
.
assertGreaterEqual
(
int
(
match
.
group
(
1
)),
params
.
num_batches
)
# Check that the batch norm moving averages are restored from checkpoints
with
tf
.
Graph
().
as_default
():
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
_build_model
()
saver
=
tf
.
train
.
Saver
(
bench
.
variable_mgr
.
savable_variables
())
with
tf
.
Session
(
config
=
benchmark_cnn
.
create_config_proto
(
params
))
as
sess
:
benchmark_cnn
.
load_checkpoint
(
saver
,
sess
,
params
.
train_dir
)
sess
.
run
(
bench
.
variable_mgr
.
get_post_init_ops
())
bn_moving_vars
=
[
v
for
v
in
tf
.
global_variables
()
if
'/batchnorm'
in
v
.
name
and
'/moving'
in
v
.
name
]
self
.
assertGreater
(
len
(
bn_moving_vars
),
0
)
for
moving_var
in
bn_moving_vars
:
moving_var_value
=
sess
.
run
(
moving_var
)
# Check that the moving means and moving variances have been restored
# by asserting they are not their default values of 0 and 1,
# respectively
if
'/moving_mean'
in
moving_var
.
name
:
self
.
assertFalse
(
np
.
array_equal
(
moving_var_value
,
np
.
zeros
(
moving_var_value
.
shape
,
moving_var_value
.
dtype
)))
else
:
self
.
assertIn
(
'/moving_variance'
,
moving_var
.
name
)
self
.
assertFalse
(
np
.
array_equal
(
moving_var_value
,
np
.
ones
(
moving_var_value
.
shape
,
moving_var_value
.
dtype
)))
def
testModel
(
self
):
_check_has_gpu
()
if
not
self
.
get_model_name
()
or
not
self
.
model_execution_test
():
return
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
1
,
num_inter_threads
=
12
,
batch_size
=
2
,
distortions
=
False
)
# Run this one; note that this uses a non-test session.
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
def
testSendRecvVariables
(
self
):
self
.
_testVariables
(
'parameter_server'
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
local_parameter_device
=
'CPU'
)
self
.
_testVariables
(
'parameter_server'
,
optimizer
=
'sgd'
)
def
testReplicatedVariables
(
self
):
self
.
_testVariables
(
'replicated'
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'replicated'
,
all_reduce_spec
=
None
)
self
.
_testVariables
(
'replicated'
,
use_fp16
=
True
,
fp16_vars
=
False
)
self
.
_testVariables
(
'replicated'
,
all_reduce_spec
=
None
,
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
True
,
fp16_inc_loss_scale_every_n
=
4
)
def
testIndependentVariables
(
self
):
self
.
_testVariables
(
'independent'
)
self
.
_testVariables
(
'independent'
,
all_reduce_spec
=
None
,
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
True
,
fp16_inc_loss_scale_every_n
=
4
)
def
testSummaryVerbosity
(
self
):
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
1
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
2
)
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
3
)
def
testStagedVariables
(
self
):
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
,
local_parameter_device
=
'CPU'
)
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
,
use_fp16
=
True
,
fp16_vars
=
True
)
def
_assert_correct_var_type
(
self
,
var
,
params
):
if
'gpu_cached_inputs'
not
in
var
.
name
:
if
params
.
use_fp16
and
params
.
fp16_vars
and
'batchnorm'
not
in
var
.
name
:
expected_type
=
tf
.
float16
else
:
expected_type
=
tf
.
float32
self
.
assertEqual
(
var
.
dtype
.
base_dtype
,
expected_type
)
def
_testVariables
(
self
,
variable_update
,
summary_verbosity
=
0
,
local_parameter_device
=
'GPU'
,
staged_vars
=
False
,
optimizer
=
'momentum'
,
# TODO(b/80125832): Enable nccl in tests
# all_reduce_spec='nccl',
all_reduce_spec
=
''
,
use_fp16
=
False
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
False
,
fp16_inc_loss_scale_every_n
=
10
):
if
not
self
.
get_model_name
():
return
_check_has_gpu
()
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
1
,
num_inter_threads
=
12
,
distortions
=
False
,
variable_update
=
variable_update
,
local_parameter_device
=
local_parameter_device
,
num_gpus
=
2
,
summary_verbosity
=
summary_verbosity
,
staged_vars
=
staged_vars
,
optimizer
=
optimizer
,
all_reduce_spec
=
all_reduce_spec
,
compact_gradient_transfer
=
False
if
all_reduce_spec
==
'nccl'
else
True
,
use_fp16
=
use_fp16
,
fp16_loss_scale
=
2.
,
fp16_vars
=
fp16_vars
,
fp16_enable_auto_loss_scale
=
fp16_enable_auto_loss_scale
,
fp16_inc_loss_scale_every_n
=
fp16_inc_loss_scale_every_n
,
)
# Test building models using multiple GPUs, but don't
# run them.
with
self
.
test_session
(
graph
=
tf
.
Graph
()):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
_build_model
()
# Rough validation of variable type and placement, depending on mode.
all_vars
=
tf
.
global_variables
()
+
tf
.
local_variables
()
if
params
.
variable_update
==
'parameter_server'
:
for
v
in
all_vars
:
tf
.
logging
.
debug
(
'var: %s'
%
v
.
name
)
match
=
re
.
match
(
r
'tower_(\d+)/v/gpu_cached_inputs:0'
,
v
.
name
)
if
match
:
self
.
assertEqual
(
v
.
device
,
'/device:GPU:%s'
%
match
.
group
(
1
))
elif
v
.
name
.
startswith
(
'v/'
):
self
.
assertEqual
(
v
.
device
,
'/device:%s:0'
%
local_parameter_device
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
in
(
'input_processing/images:0'
,
'input_processing/labels:0'
,
'init_learning_rate:0'
,
'global_step:0'
,
'loss_scale:0'
,
'loss_scale_normal_steps:0'
):
self
.
assertEqual
(
v
.
device
,
'/device:CPU:0'
)
else
:
raise
ValueError
(
'Unexpected variable %s'
%
v
.
name
)
else
:
v0_count
=
0
v1_count
=
0
for
v
in
all_vars
:
if
v
.
name
.
startswith
(
'tower_0/v0/'
):
self
.
assertEqual
(
v
.
name
,
'tower_0/v0/gpu_cached_inputs:0'
)
self
.
assertEqual
(
v
.
device
,
'/device:GPU:0'
)
elif
v
.
name
.
startswith
(
'tower_1/v1/'
):
self
.
assertEqual
(
v
.
name
,
'tower_1/v1/gpu_cached_inputs:0'
)
self
.
assertEqual
(
v
.
device
,
'/device:GPU:1'
)
elif
v
.
name
.
startswith
(
'v0/'
):
v0_count
+=
1
self
.
assertEqual
(
v
.
device
,
'/device:GPU:0'
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
.
startswith
(
'v1/'
):
v1_count
+=
1
self
.
assertEqual
(
v
.
device
,
'/device:GPU:1'
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
in
(
'input_processing/images:0'
,
'input_processing/labels:0'
,
'init_learning_rate:0'
,
'global_step:0'
,
'loss_scale:0'
,
'loss_scale_normal_steps:0'
):
self
.
assertEqual
(
v
.
device
,
'/device:CPU:0'
)
else
:
raise
ValueError
(
'Unexpected variable %s'
%
v
.
name
)
self
.
assertEqual
(
v0_count
,
v1_count
)
# Validate summary ops in the model depending on verbosity level
summary_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
SUMMARIES
)
num_summary_ops
=
len
(
summary_ops
)
self
.
assertEqual
(
num_summary_ops
>
0
,
summary_verbosity
>
0
)
if
summary_verbosity
>
0
:
has_affine_histogram
=
False
has_gradient_histogram
=
False
has_log_gradients_histogram
=
False
for
op
in
summary_ops
:
if
'/gradients'
in
op
.
name
:
has_gradient_histogram
=
True
elif
'/affine'
in
op
.
name
:
has_affine_histogram
=
True
elif
'log_gradients'
in
op
.
name
:
has_log_gradients_histogram
=
True
self
.
assertEqual
(
summary_verbosity
>=
3
,
has_affine_histogram
)
self
.
assertEqual
(
summary_verbosity
>=
3
,
has_gradient_histogram
)
self
.
assertEqual
(
summary_verbosity
>=
2
,
has_log_gradients_histogram
)
if
summary_verbosity
==
1
:
self
.
assertLess
(
num_summary_ops
,
10
)
class
TrivialModelTest
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'trivial'
class
TestVgg1Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'vgg11'
class
TestVgg19Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'vgg19'
class
TestLenet5Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'lenet'
class
TestGooglenetModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'googlenet'
class
TestOverfeatModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'overfeat'
class
TestAlexnetModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'alexnet'
def
extended_tests
(
self
):
return
True
class
TestTrivialModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'trivial'
class
TestInceptionv3Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'inception3'
def
extended_tests
(
self
):
return
True
class
TestInceptionv4Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'inception4'
class
TestResnet50Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet50'
def
model_save_load_test
(
self
):
return
True
class
TestResnet101Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet101'
class
TestResnet152Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet152'
class
TestResnet50V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet50_v2'
class
TestResnet101V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet101_v2'
class
TestResnet152V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet152_v2'
class
TfCnnBenchmarksTest
(
tf
.
test
.
TestCase
):
"""Tests that benchmark_cnn runs correctly."""
def
setUp
(
self
):
super
(
TfCnnBenchmarksTest
,
self
).
setUp
()
_check_has_gpu
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
_run_benchmark_cnn
(
self
,
params
):
logs
=
[]
benchmark_cnn
.
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)
benchmark_cnn
.
BenchmarkCNN
(
params
).
run
()
return
logs
def
_run_benchmark_cnn_with_fake_images
(
self
,
params
,
images
,
labels
):
logs
=
[]
benchmark_cnn
.
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
input_preprocessor
=
preprocessing
.
TestImagePreprocessor
(
params
.
batch_size
*
params
.
num_gpus
,
[[
params
.
batch_size
,
227
,
227
,
3
],
[
params
.
batch_size
]],
params
.
num_gpus
,
bench
.
model
.
data_type
)
bench
.
dataset
.
_queue_runner_required
=
True
bench
.
input_preprocessor
.
set_fake_data
(
images
,
labels
)
bench
.
input_preprocessor
.
expected_subset
=
(
'validation'
if
params
.
eval
else
'train'
)
bench
.
run
()
return
logs
def
_run_benchmark_cnn_with_black_and_white_images
(
self
,
params
):
"""Runs BenchmarkCNN with black and white images.
A BenchmarkCNN is created and run with black and white images as input. Half
the images are black (i.e., filled with 0s) and half are white (i.e., filled
with 255s).
Args:
params: Params for BenchmarkCNN.
Returns:
A list of lines from the output of BenchmarkCNN.
"""
# TODO(reedwm): Instead of generating images here, use black and white
# tfrecords by calling test_util.create_black_and_white_images().
effective_batch_size
=
params
.
batch_size
*
params
.
num_gpus
half_batch_size
=
effective_batch_size
//
2
images
=
np
.
zeros
((
effective_batch_size
,
227
,
227
,
3
),
dtype
=
np
.
float32
)
images
[
half_batch_size
:,
:,
:,
:]
=
255
labels
=
np
.
array
([
0
]
*
half_batch_size
+
[
1
]
*
half_batch_size
,
dtype
=
np
.
int32
)
return
self
.
_run_benchmark_cnn_with_fake_images
(
params
,
images
,
labels
)
def
_train_and_eval_local
(
self
,
params
,
check_output_values
=
False
,
max_final_loss
=
10.
,
skip
=
None
,
use_test_preprocessor
=
True
):
# TODO(reedwm): check_output_values should default to True and be enabled
# on every test. Currently, if check_output_values=True and the calls to
# tf.set_random_seed(...) and np.seed(...) are passed certain seed values in
# benchmark_cnn.py, then most tests will fail. This indicates the tests
# are brittle and could fail with small changes when
# check_output_values=True, so check_output_values defaults to False for
# now.
def
run_fn
(
run_type
,
inner_params
):
del
run_type
if
use_test_preprocessor
:
return
[
self
.
_run_benchmark_cnn_with_black_and_white_images
(
inner_params
)
]
else
:
return
[
self
.
_run_benchmark_cnn
(
inner_params
)]
return
test_util
.
train_and_eval
(
self
,
run_fn
,
params
,
check_output_values
=
check_output_values
,
max_final_loss
=
max_final_loss
,
skip
=
skip
)
def
testAlexnet
(
self
):
params
=
test_util
.
get_params
(
'testAlexnet'
).
_replace
(
num_batches
=
30
,
init_learning_rate
=
0.01
,
model
=
'alexnet'
)
self
.
_train_and_eval_local
(
params
)
def
testNoPrintAccuracy
(
self
):
params
=
test_util
.
get_params
(
'testNoPrintAccuracy'
).
_replace
(
print_training_accuracy
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testLowAccuracy
(
self
):
params
=
test_util
.
get_params
(
'testLowAccuracy'
).
_replace
(
print_training_accuracy
=
True
,
batch_size
=
5
,
num_batches
=
10
)
# We force low accuracy by having each batch containing 10 identical images,
# each with a different label. This guarantees a top-1 accuracy of exactly
# 0.1 and a top-5 accuracy of exactly 0.5.
images
=
np
.
zeros
((
10
,
227
,
227
,
3
),
dtype
=
np
.
float32
)
labels
=
np
.
arange
(
10
,
dtype
=
np
.
int32
)
logs
=
self
.
_run_benchmark_cnn_with_fake_images
(
params
,
images
,
labels
)
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
params
.
print_training_accuracy
)
last_output
=
training_outputs
[
-
1
]
# TODO(reedwm): These should be assertEqual but for some reason,
# occasionally the accuracies are lower (Running this test 500 times, these
# asserts failed twice). Investigate this problem.
self
.
assertLessEqual
(
last_output
.
top_1_accuracy
,
0.1
)
self
.
assertLessEqual
(
last_output
.
top_5_accuracy
,
0.5
)
def
testParameterServer
(
self
):
params
=
test_util
.
get_params
(
'testParameterServer'
)
self
.
_train_and_eval_local
(
params
)
def
testParameterServerStaged
(
self
):
params
=
test_util
.
get_params
(
'testParameterServerStaged'
).
_replace
(
staged_vars
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testReplicated
(
self
):
params
=
test_util
.
get_params
(
'testReplicated'
).
_replace
(
variable_update
=
'replicated'
)
self
.
_train_and_eval_local
(
params
)
def
testIndependent
(
self
):
params
=
test_util
.
get_params
(
'testIndependent'
).
_replace
(
variable_update
=
'independent'
)
self
.
_train_and_eval_local
(
params
)
def
testForwardOnly
(
self
):
params
=
test_util
.
get_params
(
'testForwardOnly'
).
_replace
(
forward_only
=
True
)
# Evaluation is not supported with --forward_only, so we set skip='eval'.
self
.
_train_and_eval_local
(
params
,
skip
=
'eval'
)
def
testForwardOnlyAndFreeze
(
self
):
params
=
test_util
.
get_params
(
'testForwardOnlyAndFreeze'
).
_replace
(
forward_only
=
True
,
freeze_when_forward_only
=
True
,
train_dir
=
None
)
# Training is not supported with --freeze_when_forward_only.
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
def
testNoDistortions
(
self
):
params
=
test_util
.
get_params
(
'testNoDistortions'
).
_replace
(
distortions
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testCpuAsLocalParamDevice
(
self
):
params
=
test_util
.
get_params
(
'testCpuAsLocalParamDevice'
).
_replace
(
local_parameter_device
=
'cpu'
)
self
.
_train_and_eval_local
(
params
)
def
testNHWC
(
self
):
params
=
test_util
.
get_params
(
'testNHWC'
).
_replace
(
data_format
=
'NHWC'
)
self
.
_train_and_eval_local
(
params
)
def
testCpuAsDevice
(
self
):
params
=
test_util
.
get_params
(
'testCpuAsDevice'
).
_replace
(
device
=
'cpu'
,
data_format
=
'NHWC'
)
# NHWC required when --device=cpu
self
.
_train_and_eval_local
(
params
)
def
testMomentumParameterServer
(
self
):
params
=
test_util
.
get_params
(
'testMomentumParameterServer'
).
_replace
(
optimizer
=
'momentum'
,
momentum
=
0.8
)
self
.
_train_and_eval_local
(
params
)
def
testRmspropReplicated
(
self
):
params
=
test_util
.
get_params
(
'testRmspropReplicated'
).
_replace
(
variable_update
=
'replicated'
,
optimizer
=
'rmsprop'
,
rmsprop_decay
=
0.8
,
rmsprop_momentum
=
0.6
,
rmsprop_epsilon
=
0.7
,
init_learning_rate
=
0.01
)
self
.
_train_and_eval_local
(
params
)
def
testBatchGroupSize
(
self
):
params
=
test_util
.
get_params
(
'testBatchGroupSize'
).
_replace
(
batch_group_size
=
4
,
num_batches
=
100
,
num_warmup_batches
=
5
)
self
.
_train_and_eval_local
(
params
)
def
testGradientClip
(
self
):
params
=
test_util
.
get_params
(
'testGradientClip'
).
_replace
(
gradient_clip
=
100.0
)
self
.
_train_and_eval_local
(
params
)
def
testWeightDecay
(
self
):
params
=
test_util
.
get_params
(
'testWeightDecay'
).
_replace
(
weight_decay
=
0.0001
)
self
.
_train_and_eval_local
(
params
)
def
testNoLayers
(
self
):
params
=
test_util
.
get_params
(
'testNoLayers'
).
_replace
(
use_tf_layers
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testSaveModelSteps
(
self
):
params
=
test_util
.
get_params
(
'testSaveModelSteps'
).
_replace
(
save_model_steps
=
2
,
num_warmup_batches
=
0
,
num_batches
=
10
,
max_ckpts_to_keep
=
3
)
self
.
_train_and_eval_local
(
params
)
for
i
in
range
(
1
,
20
+
1
):
# We train for 20 steps, since self._train_and_eval_local() does two
# training runs of 10 steps each. We save a checkpoint every 2 steps and
# keep the last 3 checkpoints, so at the end, we should have checkpoints
# for steps 16, 18, and 20.
matches
=
glob
.
glob
(
os
.
path
.
join
(
params
.
train_dir
,
'model.ckpt-{}.*'
.
format
(
i
)))
if
i
in
(
16
,
18
,
20
):
self
.
assertTrue
(
matches
)
else
:
self
.
assertFalse
(
matches
)
def
testFp16WithFp32Vars
(
self
):
params
=
test_util
.
get_params
(
'testFp16WithFp32Vars'
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_loss_scale
=
1.
)
self
.
_train_and_eval_local
(
params
)
def
testFp16WithFp16Vars
(
self
):
params
=
test_util
.
get_params
(
'testFp16WithFp16Vars'
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testXlaCompile
(
self
):
params
=
test_util
.
get_params
(
'testXlaCompile'
).
_replace
(
xla_compile
=
True
)
self
.
_train_and_eval_local
(
params
)
@
unittest
.
skip
(
'Fails for unknown reason'
)
def
testXlaCompileWithFp16
(
self
):
params
=
test_util
.
get_params
(
'testXlaCompileWithFp16'
).
_replace
(
use_fp16
=
True
,
xla_compile
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testGradientRepacking
(
self
):
params
=
test_util
.
get_params
(
'testGradientRepacking1'
).
_replace
(
gradient_repacking
=
2
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
params
=
test_util
.
get_params
(
'testGradientRepacking2'
).
_replace
(
gradient_repacking
=
2
,
use_fp16
=
True
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
def
testTraceFileChromeTraceFormat
(
self
):
trace_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTraceFileChromeTraceFormat_tracefile'
)
params
=
test_util
.
get_params
(
'testTraceFileChromeTraceFormat'
).
_replace
(
trace_file
=
trace_file
,
use_chrome_trace_format
=
True
)
self
.
_train_and_eval_local
(
params
)
self
.
assertGreater
(
os
.
stat
(
trace_file
).
st_size
,
0
)
def
testTraceFileStepStatsProto
(
self
):
trace_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTraceFileStepStatsProto_tracefile'
)
params
=
test_util
.
get_params
(
'testTraceFileStepStatsProto'
).
_replace
(
trace_file
=
trace_file
,
use_chrome_trace_format
=
False
)
self
.
_train_and_eval_local
(
params
)
self
.
assertGreater
(
os
.
stat
(
trace_file
).
st_size
,
0
)
with
open
(
trace_file
)
as
f
:
step_stats
=
step_stats_pb2
.
StepStats
()
# The following statement should not raise an exception.
contents
=
f
.
read
()
text_format
.
Merge
(
contents
,
step_stats
)
def
testTfprofFile
(
self
):
tfprof_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTfprofFile_tfproffile'
)
params
=
test_util
.
get_params
(
'testTfprofFile'
).
_replace
(
tfprof_file
=
tfprof_file
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
self
.
assertGreater
(
os
.
stat
(
tfprof_file
).
st_size
,
0
)
with
open
(
tfprof_file
,
'rb'
)
as
f
:
profile_proto
=
tfprof_log_pb2
.
ProfileProto
()
# The following statement should not raise an exception.
profile_proto
.
ParseFromString
(
f
.
read
())
@
unittest
.
skip
(
'Fails for unknown reason'
)
def
testMoveTrainDir
(
self
):
params
=
test_util
.
get_params
(
'testMoveTrainDir'
)
self
.
_train_and_eval_local
(
params
)
new_train_dir
=
params
.
train_dir
+
'_moved'
os
.
rename
(
params
.
train_dir
,
new_train_dir
)
params
=
params
.
_replace
(
train_dir
=
new_train_dir
,
eval
=
True
)
self
.
_run_benchmark_cnn_with_black_and_white_images
(
params
)
@
mock
.
patch
(
'tensorflow.compat.v1.train.Saver'
)
@
mock
.
patch
(
'benchmark_cnn._get_checkpoint_to_load'
)
def
testLoadCheckpoint
(
self
,
mock_checkpoint_to_load
,
mock_saver
):
"""Tests load checkpoint with full path to checkpoint."""
expected_checkpoint
=
'/path/to/checkpoints/model.ckpt-1243'
mock_checkpoint_to_load
.
return_value
=
expected_checkpoint
global_batch
=
benchmark_cnn
.
load_checkpoint
(
mock_saver
,
None
,
expected_checkpoint
)
self
.
assertEqual
(
global_batch
,
1243
)
def
testGetCheckpointToLoadFullPath
(
self
):
"""Tests passing full path."""
ckpt_path
=
'/foo/bar/model.ckpt-189'
full_path
=
benchmark_cnn
.
_get_checkpoint_to_load
(
ckpt_path
)
self
.
assertEqual
(
full_path
,
ckpt_path
)
def
testGetCheckpointToLoadException
(
self
):
"""Tests exception for directory without a checkpoint."""
ckpt_path
=
'/foo/bar/checkpoints'
self
.
assertRaises
(
benchmark_cnn
.
CheckpointNotFoundException
,
benchmark_cnn
.
_get_checkpoint_to_load
,
ckpt_path
)
@
mock
.
patch
(
'tensorflow.compat.v1.train.get_checkpoint_state'
)
def
testGetCheckpointToLoad
(
self
,
mock_checkpoint_state
):
"""Tests passing path to checkpoint folder."""
expected_checkpoint
=
'/path/to/checkpoints/model.ckpt-1243'
mock_checkpoint_state
.
return_value
=
mock
.
Mock
(
model_checkpoint_path
=
expected_checkpoint
)
ckpt_path
=
'/path/to/checkpoints/'
full_path
=
benchmark_cnn
.
_get_checkpoint_to_load
(
ckpt_path
)
self
.
assertEqual
(
full_path
,
expected_checkpoint
)
def
testImagenetPreprocessor
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessor'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testImagenetPreprocessorNoDistortions
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessorNoDistortions'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
,
distortions
=
False
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testImagenetPreprocessorVerboseSummary
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessorVerboseSummary'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
,
distortions
=
False
,
summary_verbosity
=
2
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testCifar10SyntheticData
(
self
):
params
=
test_util
.
get_params
(
'testCifar10SyntheticData'
).
_replace
(
data_name
=
'cifar10'
)
self
.
_train_and_eval_local
(
params
)
def
testShiftRatio
(
self
):
test_util
.
monkey_patch_base_cluster_manager
()
params
=
benchmark_cnn
.
make_params
(
data_name
=
'imagenet'
,
data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
),
job_name
=
'worker'
,
worker_hosts
=
'w1,w2,w3,w4'
,
ps_hosts
=
'p1'
,
task_index
=
0
)
self
.
assertEqual
(
benchmark_cnn
.
BenchmarkCNN
(
params
).
input_preprocessor
.
shift_ratio
,
0.0
)
params
=
params
.
_replace
(
task_index
=
3
)
self
.
assertEqual
(
benchmark_cnn
.
BenchmarkCNN
(
params
).
input_preprocessor
.
shift_ratio
,
0.75
)
def
testDistributedReplicatedSavableVars
(
self
):
test_util
.
monkey_patch_base_cluster_manager
()
params
=
benchmark_cnn
.
make_params
(
variable_update
=
'distributed_replicated'
,
model
=
'inception4'
,
data_name
=
'imagenet'
,
data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
),
job_name
=
'worker'
,
worker_hosts
=
'w1,w2,w3,w4'
,
ps_hosts
=
'p1'
,
datasets_use_prefetch
=
False
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
tf
.
Graph
().
as_default
():
bench
.
_build_model
()
savable_vars
=
bench
.
variable_mgr
.
savable_variables
()
# Assert all global variables are in savable_vars
for
v
in
tf
.
global_variables
():
if
not
v
.
name
.
startswith
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
+
'/v0'
):
self
.
assertEqual
(
v
.
name
,
'global_step:0'
)
name
=
bench
.
variable_mgr
.
_strip_port
(
v
.
name
)
if
name
.
startswith
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
):
name
=
name
[
len
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
+
'/'
):]
self
.
assertIn
(
name
,
savable_vars
)
self
.
assertIn
(
savable_vars
[
name
],
tf
.
global_variables
())
# Assert all local variables on the first tower are in savable_vars
for
v
in
tf
.
local_variables
():
if
v
.
name
.
startswith
(
'v0/'
):
name
=
bench
.
variable_mgr
.
_strip_port
(
v
.
name
)
self
.
assertIn
(
name
,
savable_vars
)
def
_test_preprocessing_eval
(
self
,
image_height
,
image_width
,
output_height
,
output_width
):
image
=
tf
.
fill
((
image_height
,
image_width
,
3
),
tf
.
constant
(
128
,
dtype
=
tf
.
uint8
))
params
=
benchmark_cnn
.
make_params
()
new_image
=
preprocessing
.
eval_image
(
image
,
output_height
,
output_width
,
0
,
'bilinear'
,
params
.
summary_verbosity
)
with
self
.
test_session
()
as
sess
:
new_image_value
=
sess
.
run
(
new_image
)
self
.
assertAllEqual
(
new_image_value
,
np
.
full
((
output_height
,
output_width
,
3
),
128
,
dtype
=
np
.
uint8
))
def
testPreprocessingEval
(
self
):
self
.
_test_preprocessing_eval
(
10
,
10
,
4
,
4
)
self
.
_test_preprocessing_eval
(
4
,
4
,
10
,
10
)
self
.
_test_preprocessing_eval
(
1
,
100
,
100
,
1
)
self
.
_test_preprocessing_eval
(
100
,
1
,
1
,
100
)
self
.
_test_preprocessing_eval
(
1
,
100
,
1
,
100
)
def
_test_preprocessing_traing
(
self
,
image_buf
,
image_color
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
,
fuse_decode_and_crop
):
new_image
=
preprocessing
.
train_image
(
image_buf
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
=
summary_verbosity
,
fuse_decode_and_crop
=
fuse_decode_and_crop
)
self
.
assertEqual
(
new_image
.
shape
,
[
output_height
,
output_width
,
3
])
with
self
.
test_session
(
use_gpu
=
True
)
as
sess
:
new_image_value
=
sess
.
run
(
new_image
)
self
.
assertAllClose
(
new_image_value
,
np
.
full
(
[
output_height
,
output_width
,
3
],
image_color
,
dtype
=
np
.
float32
),
atol
=
50.
,
rtol
=
0.
)
def
testPreprocessingTrain
(
self
):
test_data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'images'
)
black_file
=
os
.
path
.
join
(
test_data_dir
,
'black_image.jpg'
)
with
open
(
black_file
,
'rb'
)
as
f
:
black_jpg_buffer
=
f
.
read
()
white_file
=
os
.
path
.
join
(
test_data_dir
,
'white_image.jpg'
)
with
open
(
white_file
,
'rb'
)
as
f
:
white_jpg_buffer
=
f
.
read
()
bbox
=
tf
.
zeros
((
1
,
0
,
4
),
dtype
=
tf
.
float32
)
batch_position
=
0
# Each size config is (output_height, output_width, resize_method)
size_configs
=
[(
100
,
100
,
'round_robin'
),
(
150
,
10
,
'bilinear'
),
(
10
,
150
,
'nearest'
)]
# Each image config is (image_buf, image_color)
image_configs
=
[(
white_jpg_buffer
,
255
),
(
black_jpg_buffer
,
0
)]
for
(
image_buf
,
image_color
)
in
image_configs
:
for
output_height
,
output_width
,
resize_method
in
size_configs
:
for
distortions
in
[
True
,
False
]:
for
summary_verbosity
in
[
0
,
2
]:
for
fuse_decode_and_crop
in
[
True
,
False
]:
self
.
_test_preprocessing_traing
(
image_buf
,
image_color
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
,
fuse_decode_and_crop
)
def
_test_learning_rate
(
self
,
params
,
global_step_to_expected_learning_rate
):
self
.
longMessage
=
True
# pylint: disable=invalid-name
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
tf
.
Graph
().
as_default
()
as
graph
:
bench
.
_build_model
()
global_step
=
graph
.
get_tensor_by_name
(
'global_step:0'
)
learning_rate
=
graph
.
get_tensor_by_name
(
'learning_rate_tensor:0'
)
with
self
.
test_session
(
graph
=
graph
,
use_gpu
=
True
)
as
sess
:
items
=
global_step_to_expected_learning_rate
.
items
()
for
global_step_val
,
expected_learning_rate
in
items
:
self
.
assertAlmostEqual
(
sess
.
run
(
learning_rate
,
{
global_step
:
global_step_val
}),
expected_learning_rate
,
msg
=
'at global_step:{}'
.
format
(
global_step_val
))
def
testLearningRateModelSpecificResNet
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'parameter_server'
,
num_gpus
=
1
)
self
.
_test_learning_rate
(
params
,
{
0
:
0
,
150136
:
0.128
,
150137
:
0.0128
,
300273
:
0.0128
,
300274
:
0.00128
,
10000000
:
0.0000128
})
def
testLearningRateUserProvidedInitLr
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'replicated'
,
init_learning_rate
=
1.
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
10000000
:
1.
})
def
testLearningRateUserProvidedInitLrAndWarmup
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'replicated'
,
init_learning_rate
=
1.
,
num_learning_rate_warmup_epochs
=
5
)
self
.
_test_learning_rate
(
params
,
{
0
:
0.
,
12511
:
0.5
,
25022
:
1.
,
10000000
:
1.
})
def
testLearningRateUserProvidedDecayInfo
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
init_learning_rate
=
1.
,
learning_rate_decay_factor
=
0.5
,
num_epochs_per_decay
=
2
,
minimum_learning_rate
=
0.3750
,
batch_size
=
32
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
80071
:
1.
,
80072
:
0.5
,
160143
:
0.5
,
160144
:
0.375
,
10000000
:
0.375
})
def
testLearningRateUserProvidedZeroDecay
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
num_learning_rate_warmup_epochs
=
0
,
learning_rate_decay_factor
=
0.5
,
num_epochs_per_decay
=
0
,
minimum_learning_rate
=
0.3750
,
batch_size
=
32
)
with
self
.
assertRaises
(
ValueError
):
with
tf
.
Graph
().
as_default
():
# This will fail because params.learning_rate_decay_factor cannot be
# nonzero if params.num_epochs_per_decay is zero.
benchmark_cnn
.
BenchmarkCNN
(
params
).
_build_model
()
def
testLearningRateUserProvidedSchedule
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'trivial'
,
batch_size
=
32
,
piecewise_learning_rate_schedule
=
'1;3;.1;5;.01'
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
120108
:
1.
,
120109
:
0.1
,
200181
:
0.1
,
200182
:
0.01
,
100000000
:
0.01
})
def
testNumBatchesAndEpochs
(
self
):
params
=
benchmark_cnn
.
make_params
()
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
10
,
100
)
self
.
assertEqual
(
batches
,
benchmark_cnn
.
_DEFAULT_NUM_BATCHES
)
self
.
assertAlmostEqual
(
epochs
,
float
(
benchmark_cnn
.
_DEFAULT_NUM_BATCHES
)
/
10
)
params
=
benchmark_cnn
.
make_params
(
num_batches
=
21
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
25
,
50
)
self
.
assertEqual
(
batches
,
21
)
self
.
assertAlmostEqual
(
epochs
,
10.5
)
params
=
benchmark_cnn
.
make_params
(
num_epochs
=
3
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
2
,
3
)
self
.
assertEqual
(
batches
,
5
)
self
.
assertAlmostEqual
(
epochs
,
10.
/
3.
)
params
=
benchmark_cnn
.
make_params
(
num_epochs
=
4
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
2
,
3
)
self
.
assertEqual
(
batches
,
6
)
self
.
assertAlmostEqual
(
epochs
,
4
)
with
self
.
assertRaises
(
ValueError
):
params
=
benchmark_cnn
.
make_params
(
num_batches
=
100
,
num_epochs
=
100
)
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
1
,
1
)
def
_testEvalDuringTraining
(
self
,
params
,
expected_num_eval_batches_found
):
# The idea of this test is that all train images are black and all eval
# images are white. We pass the images through the TestModel, and ensure
# the outputs are as expected.
batch_size
=
params
.
batch_size
eval_batch_size
=
params
.
eval_batch_size
or
params
.
batch_size
class
TestModel
(
test_util
.
TestCNNModel
):
def
__init__
(
self
):
super
(
TestModel
,
self
).
__init__
()
self
.
depth
=
3
def
add_inference
(
self
,
cnn
):
if
cnn
.
phase_train
:
# This will allow us to test that 100 is only added during training
# and not during eval.
cnn
.
top_layer
+=
100
assert
cnn
.
top_layer
.
shape
[
0
]
==
batch_size
else
:
assert
cnn
.
top_layer
.
shape
[
0
]
==
eval_batch_size
# Reduce the image to a single number. The number should be (-1 + 100)
# during training and 1 during testing.
cnn
.
top_layer
=
tf
.
reshape
(
cnn
.
top_layer
,
(
cnn
.
top_layer
.
shape
[
0
],
-
1
))
cnn
.
top_layer
=
tf
.
reduce_mean
(
cnn
.
top_layer
,
axis
=
1
)
cnn
.
top_layer
=
tf
.
reshape
(
cnn
.
top_layer
,
(
cnn
.
top_layer
.
shape
[
0
],
1
,
1
,
1
))
cnn
.
top_size
=
1
trainable_vars
=
tf
.
trainable_variables
()
# The super method will compute image*A*B, where A=1 and B=2.
super
(
TestModel
,
self
).
add_inference
(
cnn
)
if
not
cnn
.
phase_train
:
# Assert no new variables were added, since they should be reused from
# training.
assert
len
(
trainable_vars
)
==
len
(
tf
.
trainable_variables
())
model
=
TestModel
()
dataset
=
datasets
.
ImagenetDataset
(
params
.
data_dir
)
logs
=
[]
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
model
=
model
,
dataset
=
dataset
)
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)):
bench_cnn
.
run
()
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
print_training_accuracy
=
False
)
self
.
assertEqual
(
len
(
training_outputs
),
params
.
num_batches
)
expected_training_output
=
(
-
1
+
100
)
*
1
*
2
for
training_output
in
training_outputs
:
self
.
assertEqual
(
training_output
.
loss
,
expected_training_output
)
eval_outputs
=
test_util
.
get_evaluation_outputs_from_logs
(
logs
)
self
.
assertTrue
(
eval_outputs
)
expected_eval_output
=
1
*
1
*
2
for
eval_output
in
eval_outputs
:
self
.
assertEqual
(
eval_output
.
top_1_accuracy
,
expected_eval_output
)
self
.
assertEqual
(
eval_output
.
top_5_accuracy
,
expected_eval_output
)
num_eval_batches_found
=
0
eval_batch_regex
=
re
.
compile
(
r
'^\d+\t[0-9.]+ examples/sec$'
)
for
log
in
logs
:
if
eval_batch_regex
.
match
(
log
):
num_eval_batches_found
+=
1
self
.
assertEqual
(
num_eval_batches_found
,
expected_num_eval_batches_found
)
def
testEvalDuringTraining
(
self
):
data_dir
=
test_util
.
create_black_and_white_images
()
base_params
=
test_util
.
get_params
(
'testEvalDuringTraining'
)
train_dir
=
base_params
.
train_dir
base_params
=
base_params
.
_replace
(
train_dir
=
None
,
print_training_accuracy
=
False
,
num_warmup_batches
=
0
,
num_batches
=
7
,
num_eval_batches
=
2
,
display_every
=
1
,
init_learning_rate
=
0
,
weight_decay
=
0
,
distortions
=
False
,
data_dir
=
data_dir
)
expected_num_eval_batches_found
=
(
base_params
.
num_eval_batches
*
(
base_params
.
num_batches
//
2
+
1
))
# Test --eval_during_training_every_n_steps
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'parameter_server'
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
,
summary_verbosity
=
2
,
save_summaries_steps
=
2
,
datasets_use_prefetch
=
False
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
,
use_fp16
=
True
,
train_dir
=
train_dir
,
eval_batch_size
=
base_params
.
batch_size
+
2
),
expected_num_eval_batches_found
)
# Test --eval_during_training_every_n_epochs
every_n_epochs
=
(
2
*
base_params
.
batch_size
*
base_params
.
num_gpus
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_epochs
=
every_n_epochs
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_at_specified_steps
list_steps
=
[
2
,
3
,
5
,
7
,
1000
]
num_eval_steps
=
1
+
sum
(
1
for
step
in
list_steps
if
step
<
base_params
.
num_batches
)
expected_num_eval_batches_found
=
(
base_params
.
num_eval_batches
*
num_eval_steps
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_at_specified_steps
=
list_steps
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_at_specified_epochs
list_epochs
=
[(
step
*
base_params
.
batch_size
*
base_params
.
num_gpus
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
for
step
in
list_steps
]
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_at_specified_epochs
=
list_epochs
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_every_n_steps runs with synthetic data.
params
=
base_params
.
_replace
(
variable_update
=
'replicated'
,
data_dir
=
None
,
eval_during_training_every_n_steps
=
2
,
num_batches
=
2
)
benchmark_cnn
.
BenchmarkCNN
(
params
).
run
()
def
testEvalDuringTrainingNumEpochs
(
self
):
params
=
benchmark_cnn
.
make_params
(
batch_size
=
1
,
eval_batch_size
=
2
,
eval_during_training_every_n_steps
=
1
,
num_batches
=
30
,
num_eval_epochs
=
100
/
datasets
.
IMAGENET_NUM_VAL_IMAGES
)
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
self
.
assertEqual
(
bench_cnn
.
num_batches
,
30
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_epochs
,
30
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_eval_batches
,
50
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_eval_epochs
,
100
/
datasets
.
IMAGENET_NUM_VAL_IMAGES
)
def
testEarlyStopping
(
self
):
params
=
benchmark_cnn
.
make_params
(
batch_size
=
2
,
display_every
=
1
,
num_batches
=
100
,
eval_during_training_every_n_steps
=
2
,
stop_at_top_1_accuracy
=
0.4
,
)
with
mock
.
patch
.
object
(
benchmark_cnn
.
BenchmarkCNN
,
'_eval_once'
,
side_effect
=
[(
0.1
,
0.1
),
(
0.5
,
0.5
),
(
0.2
,
0.2
)]
)
as
mock_eval_once
:
logs
=
[]
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)):
bench_cnn
.
run
()
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
print_training_accuracy
=
False
)
# We should stop after the second evaluation, and we evaluate every 2
# steps. So there should be 2 * 2 = 4 training outputs.
self
.
assertEqual
(
len
(
training_outputs
),
4
)
self
.
assertEqual
(
mock_eval_once
.
call_count
,
2
)
def
testOutOfRangeErrorsAreNotIgnored
(
self
):
error_msg
=
'Fake OutOfRangeError error message'
with
mock
.
patch
.
object
(
benchmark_cnn
.
BenchmarkCNN
,
'benchmark_with_session'
,
side_effect
=
tf
.
errors
.
OutOfRangeError
(
None
,
None
,
error_msg
)):
with
self
.
assertRaisesRegex
(
RuntimeError
,
error_msg
):
benchmark_cnn
.
BenchmarkCNN
(
benchmark_cnn
.
make_params
()).
run
()
def
testInvalidFlags
(
self
):
params
=
benchmark_cnn
.
make_params
(
device
=
'cpu'
,
data_format
=
'NCHW'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is only supported for 'replicated', 'ps',
# and 'independent' variable_updates.
invalid_variable_updates
=
[
'distributed_replicated'
,
'distributed_all_reduce'
]
for
variable_update
in
invalid_variable_updates
:
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
variable_update
=
variable_update
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is not supported for 'nccl'.
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
all_reduce_spec
=
'nccl'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is not supported for 'staged_vars'.
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
staged_vars
=
True
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
def
testMakeParams
(
self
):
default_params
=
benchmark_cnn
.
make_params
()
self
.
assertEqual
(
default_params
.
model
,
flags
.
param_specs
[
'model'
].
default_value
)
params
=
benchmark_cnn
.
make_params
(
model
=
'foo'
)
self
.
assertEqual
(
params
.
model
,
'foo'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
make_params
(
job_name
=
'foo'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
make_params
(
gpu_memory_frac_for_testing
=-
1.
)
class
VariableUpdateTest
(
tf
.
test
.
TestCase
):
"""Tests that variables are updated correctly.
These tests use a very simple deterministic model. For example, some tests use
the model
loss = image * A * B
where image is a 1x1 images (with a single scalar value), and A and B are
scalar variables. Tests will run tf_cnn_benchmarks with such a model, on a
sequence of scalar images, and assert that the losses are the correct value.
Since the losses depend on the variables, this indirectly tests variables are
updated correctly.
"""
def
setUp
(
self
):
super
(
VariableUpdateTest
,
self
).
setUp
()
_check_has_gpu
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
_get_benchmark_cnn_losses
(
self
,
inputs
,
params
):
"""Returns the losses of BenchmarkCNN on the given inputs and params."""
logs
=
[]
model
=
test_util
.
TestCNNModel
()
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
),
LOSS_AND_ACCURACY_DIGITS_TO_SHOW
=
15
):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
dataset
=
test_util
.
TestDataSet
(),
model
=
model
)
# The test model does not use labels when computing loss, so the label
# values do not matter as long as it's the right shape.
labels
=
np
.
array
([
1
]
*
inputs
.
shape
[
0
])
bench
.
input_preprocessor
.
set_fake_data
(
inputs
,
labels
)
if
bench
.
eval_input_preprocessor
:
bench
.
eval_input_preprocessor
.
set_fake_data
(
inputs
,
labels
)
bench
.
run
()
outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
params
.
print_training_accuracy
)
return
[
x
.
loss
for
x
in
outputs
]
def
_test_variable_update
(
self
,
params
):
"""Tests variables are updated correctly when the given params are used.
A BenchmarkCNN is created with a TestCNNModel, and is run with some scalar
images. The losses are then compared with the losses obtained with
TestCNNModel().manually_compute_losses()
Args:
params: a Params tuple used to create BenchmarkCNN.
"""
inputs
=
test_util
.
get_fake_var_update_inputs
()
actual_losses
=
self
.
_get_benchmark_cnn_losses
(
inputs
,
params
)
expected_losses
,
=
test_util
.
TestCNNModel
().
manually_compute_losses
(
inputs
,
1
,
params
)
rtol
=
3e-2
if
params
.
use_fp16
else
1e-5
self
.
assertAllClose
(
actual_losses
[:
len
(
expected_losses
)],
expected_losses
,
rtol
=
rtol
,
atol
=
0.
)
def
_test_variable_updates
(
self
,
params
,
var_updates
=
(
'parameter_server'
,
'replicated'
)):
for
var_update
in
var_updates
:
self
.
_test_variable_update
(
params
.
_replace
(
variable_update
=
var_update
))
def
testDefault
(
self
):
params
=
test_util
.
get_var_update_params
()
self
.
_test_variable_updates
(
params
)
# For some reason, this test doesn't always pass
# def testCpuAsDevice(self):
# params = test_util.get_var_update_params()._replace(
# device='cpu',
# data_format='NHWC') # NHWC required when --device=cpu
# self._test_variable_updates(params)
def
testCpuAsLocalParamDevice
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
local_parameter_device
=
'cpu'
)
self
.
_test_variable_updates
(
params
)
def
testFp16
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_fp16
=
True
)
self
.
_test_variable_updates
(
params
)
def
testMomentum
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
optimizer
=
'momentum'
)
self
.
_test_variable_updates
(
params
)
def
testRmsprop
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
optimizer
=
'rmsprop'
)
self
.
_test_variable_updates
(
params
)
def
testNoLayers
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_tf_layers
=
False
)
self
.
_test_variable_updates
(
params
)
def
testVariousAllReduceSpecs
(
self
):
# We do not test xring, because it requires all Variables to have at least
# two elements.
params
=
test_util
.
get_var_update_params
().
_replace
(
all_reduce_spec
=
'pscpu'
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
params
=
params
.
_replace
(
all_reduce_spec
=
'psgpu'
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
# TODO(b/80125832): Enable nccl in tests
# params = params._replace(all_reduce_spec='nccl',
# compact_gradient_transfer=False)
# self._test_variable_updates(params, var_updates=('replicated',))
def
testPrintBaseLoss
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
loss_type_to_report
=
'base_loss'
)
self
.
_test_variable_updates
(
params
)
def
testSingleL2LossOp
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
single_l2_loss_op
=
True
)
self
.
_test_variable_updates
(
params
)
def
testResourceVars
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_resource_vars
=
True
)
self
.
_test_variable_updates
(
params
)
def
testEvalDuringTrainingEveryNSteps
(
self
):
# TODO(reedwm): Test that the eval results are correct. This only tests that
# training results are correct.
params
=
test_util
.
get_var_update_params
().
_replace
(
eval_during_training_every_n_steps
=
1
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
class
VariableMgrLocalReplicatedTest
(
tf
.
test
.
TestCase
):
def
_test_grad_aggregation_with_var_mgr
(
self
,
variable_mgr
,
num_towers
,
num_vars
,
deferred_grads
):
tower_devices
=
[
'/gpu:%d'
%
i
for
i
in
range
(
num_towers
)]
tower_grads
=
[]
expected_sums
=
[
0.
]
*
num_vars
for
i
,
tower_device
in
enumerate
(
tower_devices
):
with
tf
.
device
(
tower_device
):
grad_vars
=
[]
for
j
in
range
(
num_vars
):
n
=
num_towers
*
i
+
j
grad_vars
.
append
((
tf
.
constant
(
n
,
dtype
=
tf
.
float32
),
tf
.
Variable
(
n
,
dtype
=
tf
.
float32
)))
expected_sums
[
j
]
+=
n
tower_grads
.
append
(
grad_vars
)
_
,
agg_device_grads
=
variable_mgr
.
preprocess_device_grads
(
tower_grads
)
expected_device_grads
=
[]
for
i
in
range
(
num_towers
):
expected_grad_vars
=
[]
for
j
in
range
(
num_vars
):
expected_grad_and_var
=
[
expected_sums
[
j
],
num_towers
*
i
+
j
]
if
isinstance
(
agg_device_grads
[
i
][
j
],
tuple
):
# agg_device_grads[i][j] can be a list or tuple.
expected_grad_and_var
=
tuple
(
expected_grad_and_var
)
expected_grad_vars
.
append
(
expected_grad_and_var
)
if
isinstance
(
agg_device_grads
[
i
],
tuple
):
# agg_device_grads[i] can be a list or tuple.
expected_grad_vars
=
tuple
(
expected_grad_vars
)
expected_device_grads
.
append
(
expected_grad_vars
)
config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
)
with
tf
.
Session
(
config
=
config
)
as
sess
:
sess
.
run
(
tf
.
initialize_all_variables
())
sess
.
run
(
variable_mgr
.
_warmup_ops
)
if
deferred_grads
:
# With deferred grads, the result of a session run is always the summed
# gradients from the previous session run.
sess
.
run
(
agg_device_grads
)
feed_dict
=
{
g
:
0
for
grad_vars
in
tower_grads
for
g
,
_
in
grad_vars
}
agg_device_grads_
=
sess
.
run
(
agg_device_grads
,
feed_dict
)
else
:
agg_device_grads_
=
sess
.
run
(
agg_device_grads
)
self
.
assertEqual
(
agg_device_grads_
,
expected_device_grads
)
def
_test_grad_aggregation
(
self
,
params
,
num_vars
):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
deferred_grads
=
(
params
.
variable_consistency
==
'relaxed'
)
self
.
_test_grad_aggregation_with_var_mgr
(
bench
.
variable_mgr
,
bench
.
num_gpus
,
num_vars
,
deferred_grads
)
def
test_grad_aggregation
(
self
):
base_params
=
benchmark_cnn
.
make_params
(
num_gpus
=
10
,
variable_update
=
'replicated'
,
use_fp16
=
True
)
params
=
base_params
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
variable_consistency
=
'relaxed'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
variable_consistency
=
'relaxed'
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
num_gpus
=
8
,
hierarchical_copy
=
True
)
self
.
_test_grad_aggregation
(
params
,
10
)
# TODO(b/80125832): Enable nccl in tests
# params = base_params._replace(all_reduce_spec='nccl',
# compact_gradient_transfer=False,
# # For some reason, this test freezes when
# # num_gpus=10
# num_gpus=8)
# self._test_grad_aggregation(params, 10)
params
=
base_params
.
_replace
(
all_reduce_spec
=
'pscpu'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
num_gpus
=
8
,
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
hierarchical_copy
=
True
)
self
.
_test_grad_aggregation
(
params
,
10
)
# TODO(b/80125832): Enable nccl in tests
# params = base_params._replace(num_gpus=8,
# gradient_repacking=3,
# variable_consistency='relaxed',
# all_reduce_spec='nccl',
# compact_gradient_transfer=False)
# self._test_grad_aggregation(params, 10)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
all_reduce_spec
=
'pscpu'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
all_reduce_spec
=
'xring'
)
self
.
_test_grad_aggregation
(
params
,
10
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for CNN benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
threading
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
def
tensorflow_version_tuple
():
v
=
tf
.
__version__
major
,
minor
,
patch
=
v
.
split
(
'.'
)
return
(
int
(
major
),
int
(
minor
),
patch
)
def
tensorflow_version
():
vt
=
tensorflow_version_tuple
()
return
vt
[
0
]
*
1000
+
vt
[
1
]
def
log_fn
(
log
):
print
(
log
,
flush
=
True
)
def
roll_numpy_batches
(
array
,
batch_size
,
shift_ratio
):
"""Moves a proportion of batches from start to the end of the array.
This function moves a proportion of batches, specified by `shift_ratio`, from
the starts of the array to the end. The number of batches moved is rounded
down to the nearest integer. For example,
```
roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
```
Args:
array: A Numpy array whose first dimension is the batch dimension.
batch_size: The batch size.
shift_ratio: Proportion of batches to move from the start of the array to
the end of the array.
Returns:
A new Numpy array, with a proportion of the batches at the start of `array`
moved to the end.
"""
num_items
=
array
.
shape
[
0
]
assert
num_items
%
batch_size
==
0
num_batches
=
num_items
//
batch_size
starting_batch
=
int
(
num_batches
*
shift_ratio
)
starting_item
=
starting_batch
*
batch_size
return
np
.
roll
(
array
,
-
starting_item
,
axis
=
0
)
# For Python 2.7 compatibility, we do not use threading.Barrier.
class
Barrier
(
object
):
"""Implements a lightweight Barrier.
Useful for synchronizing a fixed number of threads at known synchronization
points. Threads block on 'wait()' and simultaneously return once they have
all made that call.
# Implementation adopted from boost/thread/barrier.hpp
"""
def
__init__
(
self
,
parties
):
"""Create a barrier, initialised to 'parties' threads."""
self
.
cond
=
threading
.
Condition
(
threading
.
Lock
())
self
.
parties
=
parties
# Indicates the number of waiting parties.
self
.
waiting
=
0
# generation is needed to deal with spurious wakeups. If self.cond.wait()
# wakes up for other reasons, generation will force it go back to wait().
self
.
generation
=
0
self
.
broken
=
False
def
wait
(
self
):
"""Wait for the barrier."""
with
self
.
cond
:
# Check if the barrier has been disabled or not.
if
self
.
broken
:
return
gen
=
self
.
generation
self
.
waiting
+=
1
if
self
.
waiting
==
self
.
parties
:
self
.
waiting
=
0
self
.
generation
+=
1
self
.
cond
.
notify_all
()
# loop because of spurious wakeups
while
gen
==
self
.
generation
:
self
.
cond
.
wait
()
# TODO(huangyp): Remove this method once we find a way to know which step
# is the last barrier.
def
abort
(
self
):
"""Clear existing barrier and disable this barrier."""
with
self
.
cond
:
if
self
.
waiting
>
0
:
self
.
generation
+=
1
self
.
cond
.
notify_all
()
self
.
broken
=
True
class
ImageProducer
(
object
):
"""An image producer that puts images into a staging area periodically.
This class is useful for periodically running a set of ops, `put_ops` on a
different thread every `batch_group_size` steps.
The notify_image_consumption() method is used to increment an internal counter
so that every `batch_group_size` times it is called, `put_ops` is executed. A
barrier is placed so that notify_image_consumption() will block until
the previous call to `put_ops` has been executed.
The start() method is used to start the thread that runs `put_ops`.
The done() method waits until the last put_ops is executed and stops the
thread.
The purpose of this class is to fill an image input pipeline every
`batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
to the input pipeline when run, and that every step, 1 batch of images is
consumed. Then, by calling notify_image_consumption() every step, images are
supplied to the input pipeline at the same amount they are consumed.
Example usage:
```
put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
get_op = ... # Dequeues 1 batch, and does some operations on it
batch_group_size = 4
with tf.Session() as sess:
image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
image_producer.start()
for _ in range(100):
sess.run(get_op)
image_producer.notify_image_consumption()
```
"""
def
__init__
(
self
,
sess
,
put_ops
,
batch_group_size
,
use_python32_barrier
):
self
.
sess
=
sess
self
.
num_gets
=
0
self
.
put_ops
=
put_ops
self
.
batch_group_size
=
batch_group_size
self
.
done_event
=
threading
.
Event
()
if
(
use_python32_barrier
and
sys
.
version_info
[
0
]
==
3
and
sys
.
version_info
[
1
]
>=
2
):
self
.
put_barrier
=
threading
.
Barrier
(
2
)
else
:
self
.
put_barrier
=
Barrier
(
2
)
def
_should_put
(
self
):
return
(
self
.
num_gets
+
1
)
%
self
.
batch_group_size
==
0
def
done
(
self
):
"""Stop the image producer."""
self
.
done_event
.
set
()
self
.
put_barrier
.
abort
()
self
.
thread
.
join
()
def
start
(
self
):
"""Start the image producer."""
self
.
sess
.
run
([
self
.
put_ops
])
self
.
thread
=
threading
.
Thread
(
target
=
self
.
_loop_producer
)
# Set daemon to true to allow Ctrl + C to terminate all threads.
self
.
thread
.
daemon
=
True
self
.
thread
.
start
()
def
notify_image_consumption
(
self
):
"""Increment the counter of image_producer by 1.
This should only be called by the main thread that consumes images and runs
the model computation. One batch of images should be consumed between
calling start() and the first call to this method. Then, one batch of images
should be consumed between any two successive calls to this method.
"""
if
self
.
_should_put
():
self
.
put_barrier
.
wait
()
self
.
num_gets
+=
1
def
_loop_producer
(
self
):
while
not
self
.
done_event
.
isSet
():
self
.
sess
.
run
([
self
.
put_ops
])
self
.
put_barrier
.
wait
()
class
BaseClusterManager
(
object
):
"""The manager for the cluster of servers running the benchmark."""
def
__init__
(
self
,
params
):
worker_hosts
=
params
.
worker_hosts
.
split
(
','
)
ps_hosts
=
params
.
ps_hosts
.
split
(
','
)
if
params
.
ps_hosts
else
[]
cluster
=
{
'worker'
:
worker_hosts
}
if
ps_hosts
:
cluster
[
'ps'
]
=
ps_hosts
self
.
_cluster_spec
=
tf
.
train
.
ClusterSpec
(
cluster
)
def
get_target
(
self
):
"""Returns a target to be passed to tf.Session()."""
raise
NotImplementedError
(
'get_target must be implemented by subclass'
)
def
join_server
(
self
):
raise
NotImplementedError
(
'join must be implemented by subclass'
)
def
get_cluster_spec
(
self
):
return
self
.
_cluster_spec
def
num_workers
(
self
):
return
len
(
self
.
_cluster_spec
.
job_tasks
(
'worker'
))
def
num_ps
(
self
):
if
'ps'
in
self
.
_cluster_spec
.
jobs
:
return
len
(
self
.
_cluster_spec
.
job_tasks
(
'ps'
))
else
:
return
0
class
GrpcClusterManager
(
BaseClusterManager
):
"""A cluster manager for a cluster networked with gRPC."""
def
__init__
(
self
,
params
,
config_proto
):
super
(
GrpcClusterManager
,
self
).
__init__
(
params
)
if
params
.
job_name
==
'controller'
:
self
.
_target
=
'grpc://%s'
%
self
.
_cluster_spec
.
job_tasks
(
'worker'
)[
0
]
else
:
self
.
_server
=
tf
.
train
.
Server
(
self
.
_cluster_spec
,
job_name
=
params
.
job_name
,
task_index
=
params
.
task_index
,
config
=
config_proto
,
protocol
=
params
.
server_protocol
)
self
.
_target
=
self
.
_server
.
target
def
get_target
(
self
):
return
self
.
_target
def
join_server
(
self
):
return
self
.
_server
.
join
()
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
0 → 100644
View file @
f0d87682
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_cnn_benchmarks.cnn_util."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
threading
import
time
import
tensorflow.compat.v1
as
tf
import
cnn_util
class
CnnUtilBarrierTest
(
tf
.
test
.
TestCase
):
def
testBarrier
(
self
):
num_tasks
=
20
num_waits
=
4
barrier
=
cnn_util
.
Barrier
(
num_tasks
)
threads
=
[]
sync_matrix
=
[]
for
i
in
range
(
num_tasks
):
sync_times
=
[
0
]
*
num_waits
thread
=
threading
.
Thread
(
target
=
self
.
_run_task
,
args
=
(
barrier
,
sync_times
))
thread
.
start
()
threads
.
append
(
thread
)
sync_matrix
.
append
(
sync_times
)
for
thread
in
threads
:
thread
.
join
()
for
wait_index
in
range
(
num_waits
-
1
):
# Max of times at iteration i < min of times at iteration i + 1
self
.
assertLessEqual
(
max
([
sync_matrix
[
i
][
wait_index
]
for
i
in
range
(
num_tasks
)]),
min
([
sync_matrix
[
i
][
wait_index
+
1
]
for
i
in
range
(
num_tasks
)]))
def
_run_task
(
self
,
barrier
,
sync_times
):
for
wait_index
in
range
(
len
(
sync_times
)):
sync_times
[
wait_index
]
=
time
.
time
()
barrier
.
wait
()
def
testBarrierAbort
(
self
):
num_tasks
=
2
num_waits
=
1
sync_times
=
[
0
]
*
num_waits
barrier
=
cnn_util
.
Barrier
(
num_tasks
)
thread
=
threading
.
Thread
(
target
=
self
.
_run_task
,
args
=
(
barrier
,
sync_times
))
thread
.
start
()
barrier
.
abort
()
# thread won't be blocked by done barrier.
thread
.
join
()
class
ImageProducerTest
(
tf
.
test
.
TestCase
):
def
_slow_tensorflow_op
(
self
):
"""Returns a TensorFlow op that takes approximately 0.1s to complete."""
def
slow_func
(
v
):
time
.
sleep
(
0.1
)
return
v
return
tf
.
py_func
(
slow_func
,
[
tf
.
constant
(
0.
)],
tf
.
float32
).
op
def
_test_image_producer
(
self
,
batch_group_size
,
put_slower_than_get
):
# We use the variable x to simulate a staging area of images. x represents
# the number of batches in the staging area.
x
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
)
if
put_slower_than_get
:
put_dep
=
self
.
_slow_tensorflow_op
()
get_dep
=
tf
.
no_op
()
else
:
put_dep
=
tf
.
no_op
()
get_dep
=
self
.
_slow_tensorflow_op
()
with
tf
.
control_dependencies
([
put_dep
]):
put_op
=
x
.
assign_add
(
batch_group_size
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
get_dep
]):
get_op
=
x
.
assign_sub
(
1
,
use_locking
=
True
)
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
variables_initializer
([
x
]))
image_producer
=
cnn_util
.
ImageProducer
(
sess
,
put_op
,
batch_group_size
,
use_python32_barrier
=
False
)
image_producer
.
start
()
for
_
in
range
(
5
*
batch_group_size
):
sess
.
run
(
get_op
)
# We assert x is nonnegative, to ensure image_producer never causes
# an unstage op to block. We assert x is at most 2 * batch_group_size,
# to ensure it doesn't use too much memory by storing too many batches
# in the staging area.
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
image_producer
.
notify_image_consumption
()
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
image_producer
.
done
()
time
.
sleep
(
0.1
)
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
def
test_image_producer
(
self
):
self
.
_test_image_producer
(
1
,
False
)
self
.
_test_image_producer
(
1
,
True
)
self
.
_test_image_producer
(
2
,
False
)
self
.
_test_image_producer
(
2
,
True
)
self
.
_test_image_producer
(
3
,
False
)
self
.
_test_image_producer
(
3
,
True
)
self
.
_test_image_producer
(
8
,
False
)
self
.
_test_image_producer
(
8
,
True
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
0 → 100644
View file @
f0d87682
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Forked from reference model implementation.
COCO API: github.com/cocodataset/cocoapi/
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
atexit
import
tempfile
from
absl
import
flags
import
numpy
as
np
from
pycocotools.coco
import
COCO
from
pycocotools.cocoeval
import
COCOeval
import
six
import
tensorflow.compat.v1
as
tf
import
mlperf
import
ssd_constants
FLAGS
=
flags
.
FLAGS
# https://github.com/cocodataset/cocoapi/issues/49
if
six
.
PY3
:
import
pycocotools.coco
pycocotools
.
coco
.
unicode
=
str
def
async_eval_runner
(
queue_predictions
,
queue_results
,
val_json_file
):
"""Load intermediate eval results and get COCO metrics."""
while
True
:
message
=
queue_predictions
.
get
()
if
message
==
'STOP'
:
# poison pill
break
step
,
predictions
=
message
results
=
compute_map
(
predictions
,
val_json_file
)
queue_results
.
put
((
step
,
results
))
def
compute_map
(
predictions
,
val_json_file
):
"""Use model predictions to compute mAP.
Args:
predictions: a list of tuples returned by decoded_predictions function,
each containing the following elements:
image source_id, box coordinates in XYWH order, probability score, label
val_json_file: path to COCO annotation file
Returns:
A dictionary that maps all COCO metrics (keys) to their values
"""
if
val_json_file
.
startswith
(
"gs://"
):
_
,
local_val_json
=
tempfile
.
mkstemp
(
suffix
=
".json"
)
tf
.
gfile
.
Remove
(
local_val_json
)
tf
.
gfile
.
Copy
(
val_json_file
,
local_val_json
)
atexit
.
register
(
tf
.
gfile
.
Remove
,
local_val_json
)
else
:
local_val_json
=
val_json_file
cocoGt
=
COCO
(
local_val_json
)
cocoDt
=
cocoGt
.
loadRes
(
np
.
array
(
predictions
))
E
=
COCOeval
(
cocoGt
,
cocoDt
,
iouType
=
'bbox'
)
E
.
evaluate
()
E
.
accumulate
()
E
.
summarize
()
print
(
"Current AP: {:.5f}"
.
format
(
E
.
stats
[
0
]))
metric_names
=
[
'AP'
,
'AP50'
,
'AP75'
,
'APs'
,
'APm'
,
'APl'
,
'ARmax1'
,
'ARmax10'
,
'ARmax100'
,
'ARs'
,
'ARm'
,
'ARl'
]
# Prefix with "COCO" to group in TensorBoard.
return
{
"COCO/"
+
key
:
value
for
key
,
value
in
zip
(
metric_names
,
E
.
stats
)}
def
calc_iou
(
target
,
candidates
):
target_tiled
=
np
.
tile
(
target
[
np
.
newaxis
,
:],
(
candidates
.
shape
[
0
],
1
))
# Left Top & Right Bottom
lt
=
np
.
maximum
(
target_tiled
[:,:
2
],
candidates
[:,:
2
])
rb
=
np
.
minimum
(
target_tiled
[:,
2
:],
candidates
[:,
2
:])
delta
=
np
.
maximum
(
rb
-
lt
,
0
)
intersect
=
delta
[:,
0
]
*
delta
[:,
1
]
delta1
=
target_tiled
[:,
2
:]
-
candidates
[:,:
2
]
area1
=
delta1
[:,
0
]
*
delta1
[:,
1
]
delta2
=
target_tiled
[:,
2
:]
-
candidates
[:,:
2
]
area2
=
delta2
[:,
0
]
*
delta2
[:,
1
]
iou
=
intersect
/
(
area1
+
area2
-
intersect
)
return
iou
# TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based
# implementation under ssd_model.py accuracy_function.
def
decode_predictions
(
labels_and_predictions
):
"""Decode predictions and remove unused boxes and labels."""
predictions
=
[]
for
example
in
labels_and_predictions
:
source_id
=
int
(
example
[
ssd_constants
.
SOURCE_ID
])
pred_box
=
example
[
ssd_constants
.
PRED_BOXES
]
pred_scores
=
example
[
ssd_constants
.
PRED_SCORES
]
locs
,
labels
,
probs
=
decode_single
(
pred_box
,
pred_scores
,
ssd_constants
.
OVERLAP_CRITERIA
,
ssd_constants
.
MAX_NUM_EVAL_BOXES
,
ssd_constants
.
MAX_NUM_EVAL_BOXES
)
raw_height
,
raw_width
,
_
=
example
[
ssd_constants
.
RAW_SHAPE
]
for
loc
,
label
,
prob
in
zip
(
locs
,
labels
,
probs
):
# Ordering convention differs, hence [1], [0] rather than [0], [1]
x
,
y
=
loc
[
1
]
*
raw_width
,
loc
[
0
]
*
raw_height
w
,
h
=
(
loc
[
3
]
-
loc
[
1
])
*
raw_width
,
(
loc
[
2
]
-
loc
[
0
])
*
raw_height
predictions
.
append
(
[
source_id
,
x
,
y
,
w
,
h
,
prob
,
ssd_constants
.
CLASS_INV_MAP
[
label
]])
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
NMS_THRESHOLD
,
value
=
ssd_constants
.
OVERLAP_CRITERIA
)
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
NMS_MAX_DETECTIONS
,
value
=
ssd_constants
.
MAX_NUM_EVAL_BOXES
)
return
predictions
def
decode_single
(
bboxes_in
,
scores_in
,
criteria
,
max_output
,
max_num
=
200
):
# Reference to https://github.com/amdegroot/ssd.pytorch
bboxes_out
=
[]
scores_out
=
[]
labels_out
=
[]
for
i
,
score
in
enumerate
(
np
.
split
(
scores_in
,
scores_in
.
shape
[
1
],
1
)):
score
=
np
.
squeeze
(
score
,
1
)
# skip background
if
i
==
0
:
continue
mask
=
score
>
ssd_constants
.
MIN_SCORE
if
not
np
.
any
(
mask
):
continue
bboxes
,
score
=
bboxes_in
[
mask
,
:],
score
[
mask
]
score_idx_sorted
=
np
.
argsort
(
score
)
score_sorted
=
score
[
score_idx_sorted
]
score_idx_sorted
=
score_idx_sorted
[
-
max_num
:]
candidates
=
[]
# perform non-maximum suppression
while
len
(
score_idx_sorted
):
idx
=
score_idx_sorted
[
-
1
]
bboxes_sorted
=
bboxes
[
score_idx_sorted
,
:]
bboxes_idx
=
bboxes
[
idx
,
:]
iou
=
calc_iou
(
bboxes_idx
,
bboxes_sorted
)
score_idx_sorted
=
score_idx_sorted
[
iou
<
criteria
]
candidates
.
append
(
idx
)
bboxes_out
.
append
(
bboxes
[
candidates
,
:])
scores_out
.
append
(
score
[
candidates
])
labels_out
.
extend
([
i
]
*
len
(
candidates
))
if
len
(
scores_out
)
==
0
:
tf
.
logging
.
info
(
"No objects detected. Returning dummy values."
)
return
(
np
.
zeros
(
shape
=
(
1
,
4
),
dtype
=
np
.
float32
),
np
.
zeros
(
shape
=
(
1
,),
dtype
=
np
.
int32
),
np
.
ones
(
shape
=
(
1
,),
dtype
=
np
.
float32
)
*
ssd_constants
.
DUMMY_SCORE
,
)
bboxes_out
=
np
.
concatenate
(
bboxes_out
,
axis
=
0
)
scores_out
=
np
.
concatenate
(
scores_out
,
axis
=
0
)
labels_out
=
np
.
array
(
labels_out
)
max_ids
=
np
.
argsort
(
scores_out
)[
-
max_output
:]
return
bboxes_out
[
max_ids
,
:],
labels_out
[
max_ids
],
scores_out
[
max_ids
]
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
0 → 100644
View file @
f0d87682
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Constants used in tf_cnn_benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
enum
import
Enum
# Results fetched with this prefix will not be reduced. Instead, they will be
# passed as matrices to model's postprocess function.
UNREDUCED_ACCURACY_OP_PREFIX
=
"tensor:"
# Eval result values with this name prefix will be included in summary.
SIMPLE_VALUE_RESULT_PREFIX
=
"simple_value:"
class
BenchmarkMode
(
object
):
"""Benchmark running mode."""
TRAIN
=
"training"
EVAL
=
"evaluation"
TRAIN_AND_EVAL
=
"training + evaluation"
FORWARD_ONLY
=
"forward only"
class
NetworkTopology
(
str
,
Enum
):
"""Network topology describes how multiple GPUs are inter-connected.
"""
# DGX-1 uses hybrid cube mesh topology with the following device peer to peer
# matrix:
# DMA: 0 1 2 3 4 5 6 7
# 0: Y Y Y Y Y N N N
# 1: Y Y Y Y N Y N N
# 2: Y Y Y Y N N Y N
# 3: Y Y Y Y N N N Y
# 4: Y N N N Y Y Y Y
# 5: N Y N N Y Y Y Y
# 6: N N Y N Y Y Y Y
# 7: N N N Y Y Y Y Y
DGX1
=
"dgx1"
# V100 in GCP are connected with the following device peer to peer matrix.
# In this topology, bandwidth of the connection depends on if it uses NVLink
# or PCIe link.
# DMA: 0 1 2 3 4 5 6 7
# 0: Y Y Y Y N Y N N
# 1: Y Y Y Y N N N N
# 2: Y Y Y Y N N N Y
# 3: Y Y Y Y N N N N
# 4: N N N N Y Y Y Y
# 5: Y N N N Y Y Y Y
# 6: N N N N Y Y Y Y
# 7: N N Y N Y Y Y Y
GCP_V100
=
"gcp_v100"
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""CNN builder."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
defaultdict
import
contextlib
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-direct-tensorflow-import
import
mlperf
from
tensorflow.python.layers
import
convolutional
as
conv_layers
from
tensorflow.python.layers
import
core
as
core_layers
from
tensorflow.python.layers
import
normalization
as
normalization_layers
from
tensorflow.python.layers
import
pooling
as
pooling_layers
from
tensorflow.python.training
import
moving_averages
_data_format_to_channel_axis
=
{
'NCHW'
:
1
,
'NHWC'
:
3
}
class
ConvNetBuilder
(
object
):
"""Builder of cnn net."""
def
__init__
(
self
,
input_op
,
input_nchan
,
phase_train
,
use_tf_layers
,
data_format
=
'NCHW'
,
dtype
=
tf
.
float32
,
variable_dtype
=
tf
.
float32
):
self
.
top_layer
=
input_op
self
.
top_size
=
input_nchan
self
.
phase_train
=
phase_train
self
.
use_tf_layers
=
use_tf_layers
self
.
data_format
=
data_format
self
.
dtype
=
dtype
self
.
variable_dtype
=
variable_dtype
self
.
counts
=
defaultdict
(
lambda
:
0
)
self
.
use_batch_norm
=
False
self
.
batch_norm_config
=
{}
# 'decay': 0.997, 'scale': True}
self
.
channel_pos
=
(
'channels_last'
if
data_format
==
'NHWC'
else
'channels_first'
)
self
.
aux_top_layer
=
None
self
.
aux_top_size
=
0
def
get_custom_getter
(
self
):
"""Returns a custom getter that this class's methods must be called under.
All methods of this class must be called under a variable scope that was
passed this custom getter. Example:
```python
network = ConvNetBuilder(...)
with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
network.conv(...)
# Call more methods of network here
```
Currently, this custom getter only does anything if self.use_tf_layers is
True. In that case, it causes variables to be stored as dtype
self.variable_type, then casted to the requested dtype, instead of directly
storing the variable as the requested dtype.
"""
def
inner_custom_getter
(
getter
,
*
args
,
**
kwargs
):
"""Custom getter that forces variables to have type self.variable_type."""
if
not
self
.
use_tf_layers
:
return
getter
(
*
args
,
**
kwargs
)
requested_dtype
=
kwargs
[
'dtype'
]
if
not
(
requested_dtype
==
tf
.
float32
and
self
.
variable_dtype
==
tf
.
float16
):
# Only change the variable dtype if doing so does not decrease variable
# precision.
kwargs
[
'dtype'
]
=
self
.
variable_dtype
var
=
getter
(
*
args
,
**
kwargs
)
# This if statement is needed to guard the cast, because batch norm
# assigns directly to the return value of this custom getter. The cast
# makes the return value not a variable so it cannot be assigned. Batch
# norm variables are always in fp32 so this if statement is never
# triggered for them.
if
var
.
dtype
.
base_dtype
!=
requested_dtype
:
var
=
tf
.
cast
(
var
,
requested_dtype
)
return
var
return
inner_custom_getter
@
contextlib
.
contextmanager
def
switch_to_aux_top_layer
(
self
):
"""Context that construct cnn in the auxiliary arm."""
if
self
.
aux_top_layer
is
None
:
raise
RuntimeError
(
'Empty auxiliary top layer in the network.'
)
saved_top_layer
=
self
.
top_layer
saved_top_size
=
self
.
top_size
self
.
top_layer
=
self
.
aux_top_layer
self
.
top_size
=
self
.
aux_top_size
yield
self
.
aux_top_layer
=
self
.
top_layer
self
.
aux_top_size
=
self
.
top_size
self
.
top_layer
=
saved_top_layer
self
.
top_size
=
saved_top_size
def
get_variable
(
self
,
name
,
shape
,
dtype
,
cast_dtype
,
*
args
,
**
kwargs
):
# TODO(reedwm): Currently variables and gradients are transferred to other
# devices and machines as type `dtype`, not `cast_dtype`. In particular,
# this means in fp16 mode, variables are transferred as fp32 values, not
# fp16 values, which uses extra bandwidth.
var
=
tf
.
get_variable
(
name
,
shape
,
dtype
,
*
args
,
**
kwargs
)
return
tf
.
cast
(
var
,
cast_dtype
)
def
_conv2d_impl
(
self
,
input_layer
,
num_channels_in
,
filters
,
kernel_size
,
strides
,
padding
,
kernel_initializer
):
if
self
.
use_tf_layers
:
return
conv_layers
.
conv2d
(
input_layer
,
filters
,
kernel_size
,
strides
,
padding
,
self
.
channel_pos
,
kernel_initializer
=
kernel_initializer
,
use_bias
=
False
)
else
:
weights_shape
=
[
kernel_size
[
0
],
kernel_size
[
1
],
num_channels_in
,
filters
]
# We use the name 'conv2d/kernel' so the variable has the same name as its
# tf.layers equivalent. This way, if a checkpoint is written when
# self.use_tf_layers == True, it can be loaded when
# self.use_tf_layers == False, and vice versa.
weights
=
self
.
get_variable
(
'conv2d/kernel'
,
weights_shape
,
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
kernel_initializer
)
if
self
.
data_format
==
'NHWC'
:
strides
=
[
1
]
+
strides
+
[
1
]
else
:
strides
=
[
1
,
1
]
+
strides
return
tf
.
nn
.
conv2d
(
input_layer
,
weights
,
strides
,
padding
,
data_format
=
self
.
data_format
)
def
conv
(
self
,
num_out_channels
,
k_height
,
k_width
,
d_height
=
1
,
d_width
=
1
,
mode
=
'SAME'
,
input_layer
=
None
,
num_channels_in
=
None
,
use_batch_norm
=
None
,
stddev
=
None
,
activation
=
'relu'
,
bias
=
0.0
,
kernel_initializer
=
None
):
"""Construct a conv2d layer on top of cnn."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
num_channels_in
is
None
:
num_channels_in
=
self
.
top_size
if
stddev
is
not
None
and
kernel_initializer
is
None
:
kernel_initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
stddev
)
if
kernel_initializer
is
None
:
kernel_initializer
=
tf
.
variance_scaling_initializer
()
name
=
'conv'
+
str
(
self
.
counts
[
'conv'
])
self
.
counts
[
'conv'
]
+=
1
with
tf
.
variable_scope
(
name
):
strides
=
[
1
,
d_height
,
d_width
,
1
]
if
self
.
data_format
==
'NCHW'
:
strides
=
[
strides
[
0
],
strides
[
3
],
strides
[
1
],
strides
[
2
]]
if
mode
!=
'SAME_RESNET'
:
conv
=
self
.
_conv2d_impl
(
input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
mode
,
kernel_initializer
=
kernel_initializer
)
else
:
# Special padding mode for ResNet models
if
d_height
==
1
and
d_width
==
1
:
conv
=
self
.
_conv2d_impl
(
input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
'SAME'
,
kernel_initializer
=
kernel_initializer
)
else
:
rate
=
1
# Unused (for 'a trous' convolutions)
kernel_height_effective
=
k_height
+
(
k_height
-
1
)
*
(
rate
-
1
)
pad_h_beg
=
(
kernel_height_effective
-
1
)
//
2
pad_h_end
=
kernel_height_effective
-
1
-
pad_h_beg
kernel_width_effective
=
k_width
+
(
k_width
-
1
)
*
(
rate
-
1
)
pad_w_beg
=
(
kernel_width_effective
-
1
)
//
2
pad_w_end
=
kernel_width_effective
-
1
-
pad_w_beg
padding
=
[[
0
,
0
],
[
pad_h_beg
,
pad_h_end
],
[
pad_w_beg
,
pad_w_end
],
[
0
,
0
]]
if
self
.
data_format
==
'NCHW'
:
padding
=
[
padding
[
0
],
padding
[
3
],
padding
[
1
],
padding
[
2
]]
padded_input_layer
=
tf
.
pad
(
input_layer
,
padding
)
conv
=
self
.
_conv2d_impl
(
padded_input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
'VALID'
,
kernel_initializer
=
kernel_initializer
)
if
use_batch_norm
is
None
:
use_batch_norm
=
self
.
use_batch_norm
mlperf
.
logger
.
log_conv2d
(
input_tensor
=
input_layer
,
output_tensor
=
conv
,
stride_height
=
d_height
,
stride_width
=
d_width
,
filters
=
num_out_channels
,
initializer
=
kernel_initializer
,
use_bias
=
not
use_batch_norm
and
bias
is
not
None
)
if
not
use_batch_norm
:
if
bias
is
not
None
:
biases
=
self
.
get_variable
(
'biases'
,
[
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
constant_initializer
(
bias
))
biased
=
tf
.
reshape
(
tf
.
nn
.
bias_add
(
conv
,
biases
,
data_format
=
self
.
data_format
),
conv
.
get_shape
())
else
:
biased
=
conv
else
:
self
.
top_layer
=
conv
self
.
top_size
=
num_out_channels
biased
=
self
.
batch_norm
(
**
self
.
batch_norm_config
)
if
activation
==
'relu'
:
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_RELU
)
conv1
=
tf
.
nn
.
relu
(
biased
)
elif
activation
==
'linear'
or
activation
is
None
:
conv1
=
biased
elif
activation
==
'tanh'
:
conv1
=
tf
.
nn
.
tanh
(
biased
)
else
:
raise
KeyError
(
'Invalid activation type
\'
%s
\'
'
%
activation
)
self
.
top_layer
=
conv1
self
.
top_size
=
num_out_channels
return
conv1
def
_pool
(
self
,
pool_name
,
pool_function
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
):
"""Construct a pooling layer."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
num_channels_in
name
=
pool_name
+
str
(
self
.
counts
[
pool_name
])
self
.
counts
[
pool_name
]
+=
1
if
self
.
use_tf_layers
:
pool
=
pool_function
(
input_layer
,
[
k_height
,
k_width
],
[
d_height
,
d_width
],
padding
=
mode
,
data_format
=
self
.
channel_pos
,
name
=
name
)
else
:
if
self
.
data_format
==
'NHWC'
:
ksize
=
[
1
,
k_height
,
k_width
,
1
]
strides
=
[
1
,
d_height
,
d_width
,
1
]
else
:
ksize
=
[
1
,
1
,
k_height
,
k_width
]
strides
=
[
1
,
1
,
d_height
,
d_width
]
pool
=
tf
.
nn
.
max_pool
(
input_layer
,
ksize
,
strides
,
padding
=
mode
,
data_format
=
self
.
data_format
,
name
=
name
)
if
pool_name
==
'mpool'
:
mlperf
.
logger
.
log_max_pool
(
input_tensor
=
input_layer
,
output_tensor
=
pool
)
self
.
top_layer
=
pool
return
pool
def
mpool
(
self
,
k_height
,
k_width
,
d_height
=
2
,
d_width
=
2
,
mode
=
'VALID'
,
input_layer
=
None
,
num_channels_in
=
None
):
"""Construct a max pooling layer."""
return
self
.
_pool
(
'mpool'
,
pooling_layers
.
max_pooling2d
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
)
def
apool
(
self
,
k_height
,
k_width
,
d_height
=
2
,
d_width
=
2
,
mode
=
'VALID'
,
input_layer
=
None
,
num_channels_in
=
None
):
"""Construct an average pooling layer."""
return
self
.
_pool
(
'apool'
,
pooling_layers
.
average_pooling2d
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
)
def
reshape
(
self
,
shape
,
input_layer
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
self
.
top_layer
=
tf
.
reshape
(
input_layer
,
shape
)
self
.
top_size
=
shape
[
-
1
]
# HACK This may not always work
return
self
.
top_layer
def
affine
(
self
,
num_out_channels
,
input_layer
=
None
,
num_channels_in
=
None
,
bias
=
0.0
,
stddev
=
None
,
activation
=
'relu'
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
num_channels_in
is
None
:
num_channels_in
=
self
.
top_size
name
=
'affine'
+
str
(
self
.
counts
[
'affine'
])
self
.
counts
[
'affine'
]
+=
1
with
tf
.
variable_scope
(
name
):
init_factor
=
2.
if
activation
==
'relu'
else
1.
stddev
=
stddev
or
np
.
sqrt
(
init_factor
/
num_channels_in
)
kernel
=
self
.
get_variable
(
'weights'
,
[
num_channels_in
,
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
stddev
))
biases
=
self
.
get_variable
(
'biases'
,
[
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
constant_initializer
(
bias
))
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_DENSE
,
value
=
num_out_channels
)
logits
=
tf
.
nn
.
xw_plus_b
(
input_layer
,
kernel
,
biases
)
if
activation
==
'relu'
:
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_RELU
)
affine1
=
tf
.
nn
.
relu
(
logits
,
name
=
name
)
elif
activation
==
'linear'
or
activation
is
None
:
affine1
=
logits
else
:
raise
KeyError
(
'Invalid activation type
\'
%s
\'
'
%
activation
)
self
.
top_layer
=
affine1
self
.
top_size
=
num_out_channels
return
affine1
def
inception_module
(
self
,
name
,
cols
,
input_layer
=
None
,
in_size
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
in_size
is
None
:
in_size
=
self
.
top_size
name
+=
str
(
self
.
counts
[
name
])
self
.
counts
[
name
]
+=
1
with
tf
.
variable_scope
(
name
):
col_layers
=
[]
col_layer_sizes
=
[]
for
c
,
col
in
enumerate
(
cols
):
col_layers
.
append
([])
col_layer_sizes
.
append
([])
for
l
,
layer
in
enumerate
(
col
):
ltype
,
args
=
layer
[
0
],
layer
[
1
:]
kwargs
=
{
'input_layer'
:
input_layer
,
'num_channels_in'
:
in_size
}
if
l
==
0
else
{}
if
ltype
==
'conv'
:
self
.
conv
(
*
args
,
**
kwargs
)
elif
ltype
==
'mpool'
:
self
.
mpool
(
*
args
,
**
kwargs
)
elif
ltype
==
'apool'
:
self
.
apool
(
*
args
,
**
kwargs
)
elif
ltype
==
'share'
:
# Share matching layer from previous column
self
.
top_layer
=
col_layers
[
c
-
1
][
l
]
self
.
top_size
=
col_layer_sizes
[
c
-
1
][
l
]
else
:
raise
KeyError
(
'Invalid layer type for inception module:
\'
%s
\'
'
%
ltype
)
col_layers
[
c
].
append
(
self
.
top_layer
)
col_layer_sizes
[
c
].
append
(
self
.
top_size
)
catdim
=
3
if
self
.
data_format
==
'NHWC'
else
1
self
.
top_layer
=
tf
.
concat
([
layers
[
-
1
]
for
layers
in
col_layers
],
catdim
)
self
.
top_size
=
sum
([
sizes
[
-
1
]
for
sizes
in
col_layer_sizes
])
return
self
.
top_layer
def
spatial_mean
(
self
,
keep_dims
=
False
):
name
=
'spatial_mean'
+
str
(
self
.
counts
[
'spatial_mean'
])
self
.
counts
[
'spatial_mean'
]
+=
1
axes
=
[
1
,
2
]
if
self
.
data_format
==
'NHWC'
else
[
2
,
3
]
self
.
top_layer
=
tf
.
reduce_mean
(
self
.
top_layer
,
axes
,
keepdims
=
keep_dims
,
name
=
name
)
return
self
.
top_layer
def
dropout
(
self
,
keep_prob
=
0.5
,
input_layer
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
None
name
=
'dropout'
+
str
(
self
.
counts
[
'dropout'
])
with
tf
.
variable_scope
(
name
):
if
not
self
.
phase_train
:
keep_prob
=
1.0
if
self
.
use_tf_layers
:
dropout
=
core_layers
.
dropout
(
input_layer
,
1.
-
keep_prob
,
training
=
self
.
phase_train
)
else
:
dropout
=
tf
.
nn
.
dropout
(
input_layer
,
keep_prob
)
self
.
top_layer
=
dropout
return
dropout
def
_batch_norm_without_layers
(
self
,
input_layer
,
decay
,
use_scale
,
epsilon
):
"""Batch normalization on `input_layer` without tf.layers."""
# We make this function as similar as possible to the
# tf.contrib.layers.batch_norm, to minimize the differences between using
# layers and not using layers.
shape
=
input_layer
.
shape
num_channels
=
shape
[
3
]
if
self
.
data_format
==
'NHWC'
else
shape
[
1
]
beta
=
self
.
get_variable
(
'beta'
,
[
num_channels
],
tf
.
float32
,
tf
.
float32
,
initializer
=
tf
.
zeros_initializer
())
if
use_scale
:
gamma
=
self
.
get_variable
(
'gamma'
,
[
num_channels
],
tf
.
float32
,
tf
.
float32
,
initializer
=
tf
.
ones_initializer
())
else
:
gamma
=
tf
.
constant
(
1.0
,
tf
.
float32
,
[
num_channels
])
# For moving variables, we use tf.get_variable instead of self.get_variable,
# since self.get_variable returns the result of tf.cast which we cannot
# assign to.
moving_mean
=
tf
.
get_variable
(
'moving_mean'
,
[
num_channels
],
tf
.
float32
,
initializer
=
tf
.
zeros_initializer
(),
trainable
=
False
)
moving_variance
=
tf
.
get_variable
(
'moving_variance'
,
[
num_channels
],
tf
.
float32
,
initializer
=
tf
.
ones_initializer
(),
trainable
=
False
)
if
self
.
phase_train
:
bn
,
batch_mean
,
batch_variance
=
tf
.
nn
.
fused_batch_norm
(
input_layer
,
gamma
,
beta
,
epsilon
=
epsilon
,
data_format
=
self
.
data_format
,
is_training
=
True
)
mean_update
=
moving_averages
.
assign_moving_average
(
moving_mean
,
batch_mean
,
decay
=
decay
,
zero_debias
=
False
)
variance_update
=
moving_averages
.
assign_moving_average
(
moving_variance
,
batch_variance
,
decay
=
decay
,
zero_debias
=
False
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
,
mean_update
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
,
variance_update
)
else
:
bn
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
input_layer
,
gamma
,
beta
,
mean
=
moving_mean
,
variance
=
moving_variance
,
epsilon
=
epsilon
,
data_format
=
self
.
data_format
,
is_training
=
False
)
return
bn
def
batch_norm
(
self
,
input_layer
=
None
,
decay
=
0.999
,
scale
=
False
,
epsilon
=
0.001
):
"""Adds a Batch Normalization layer."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
None
name
=
'batchnorm'
+
str
(
self
.
counts
[
'batchnorm'
])
self
.
counts
[
'batchnorm'
]
+=
1
center
=
True
with
tf
.
variable_scope
(
name
)
as
scope
:
if
self
.
use_tf_layers
:
layer_obj
=
normalization_layers
.
BatchNormalization
(
momentum
=
decay
,
scale
=
scale
,
epsilon
=
epsilon
,
fused
=
True
,
axis
=
_data_format_to_channel_axis
[
self
.
data_format
],
# We pass this 'scope' argument for compatibility with checkpoints
# created with the contrib version of batch norm. tf_cnn_benchmarks
# used to use the contrib version.
_scope
=
scope
,
center
=
center
,
name
=
scope
.
name
)
bn
=
layer_obj
.
apply
(
input_layer
,
training
=
self
.
phase_train
)
else
:
bn
=
self
.
_batch_norm_without_layers
(
input_layer
,
decay
,
scale
,
epsilon
)
self
.
top_layer
=
bn
self
.
top_size
=
bn
.
shape
[
3
]
if
self
.
data_format
==
'NHWC'
else
bn
.
shape
[
1
]
self
.
top_size
=
int
(
self
.
top_size
)
mlperf
.
logger
.
log_batch_norm
(
input_tensor
=
input_layer
,
output_tensor
=
bn
,
momentum
=
decay
,
epsilon
=
epsilon
,
center
=
center
,
scale
=
scale
,
training
=
self
.
phase_train
)
return
bn
def
lrn
(
self
,
depth_radius
,
bias
,
alpha
,
beta
):
"""Adds a local response normalization layer."""
name
=
'lrn'
+
str
(
self
.
counts
[
'lrn'
])
self
.
counts
[
'lrn'
]
+=
1
self
.
top_layer
=
tf
.
nn
.
lrn
(
self
.
top_layer
,
depth_radius
,
bias
,
alpha
,
beta
,
name
=
name
)
return
self
.
top_layer
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark dataset utilities.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
abc
import
abstractmethod
import
os
import
numpy
as
np
import
six
from
six.moves
import
cPickle
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
from
tensorflow.python.platform
import
gfile
import
preprocessing
IMAGENET_NUM_TRAIN_IMAGES
=
1281167
IMAGENET_NUM_VAL_IMAGES
=
50000
COCO_NUM_TRAIN_IMAGES
=
118287
COCO_NUM_VAL_IMAGES
=
4952
class
Dataset
(
object
):
"""Abstract class for cnn benchmarks dataset."""
def
__init__
(
self
,
name
,
data_dir
=
None
,
queue_runner_required
=
False
,
num_classes
=
None
):
self
.
name
=
name
self
.
data_dir
=
data_dir
self
.
_queue_runner_required
=
queue_runner_required
self
.
_num_classes
=
num_classes
def
tf_record_pattern
(
self
,
subset
):
return
os
.
path
.
join
(
self
.
data_dir
,
'%s-*-of-*'
%
subset
)
def
reader
(
self
):
return
tf
.
TFRecordReader
()
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
@
num_classes
.
setter
def
num_classes
(
self
,
val
):
self
.
_num_classes
=
val
@
abstractmethod
def
num_examples_per_epoch
(
self
,
subset
):
pass
def
__str__
(
self
):
return
self
.
name
def
get_input_preprocessor
(
self
,
input_preprocessor
=
'default'
):
assert
not
self
.
use_synthetic_gpu_inputs
()
return
_SUPPORTED_INPUT_PREPROCESSORS
[
self
.
name
][
input_preprocessor
]
def
queue_runner_required
(
self
):
return
self
.
_queue_runner_required
def
use_synthetic_gpu_inputs
(
self
):
return
not
self
.
data_dir
class
LibrispeechDataset
(
Dataset
):
"""Configuration for LibriSpeech dataset."""
def
__init__
(
self
,
data_dir
=
None
):
super
(
LibrispeechDataset
,
self
).
__init__
(
'librispeech'
,
data_dir
,
num_classes
=
29
)
def
tf_record_pattern
(
self
,
subset
):
if
subset
==
'train'
:
return
os
.
path
.
join
(
self
.
data_dir
,
'train-clean-*.tfrecords'
)
elif
subset
==
'validation'
:
return
os
.
path
.
join
(
self
.
data_dir
,
'test-clean.tfrecords'
)
else
:
return
''
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
del
subset
return
2
# TODO(laigd): currently this is an arbitrary number.
class
ImageDataset
(
Dataset
):
"""Abstract class for image datasets."""
def
__init__
(
self
,
name
,
height
,
width
,
depth
=
None
,
data_dir
=
None
,
queue_runner_required
=
False
,
num_classes
=
1001
):
super
(
ImageDataset
,
self
).
__init__
(
name
,
data_dir
,
queue_runner_required
,
num_classes
)
self
.
height
=
height
self
.
width
=
width
self
.
depth
=
depth
or
3
class
ImagenetDataset
(
ImageDataset
):
"""Configuration for Imagenet dataset."""
def
__init__
(
self
,
data_dir
=
None
):
super
(
ImagenetDataset
,
self
).
__init__
(
'imagenet'
,
300
,
300
,
data_dir
=
data_dir
)
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
IMAGENET_NUM_TRAIN_IMAGES
elif
subset
==
'validation'
:
return
IMAGENET_NUM_VAL_IMAGES
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
class
Cifar10Dataset
(
ImageDataset
):
"""Configuration for cifar 10 dataset.
It will mount all the input images to memory.
"""
def
__init__
(
self
,
data_dir
=
None
):
super
(
Cifar10Dataset
,
self
).
__init__
(
'cifar10'
,
32
,
32
,
data_dir
=
data_dir
,
queue_runner_required
=
True
,
num_classes
=
11
)
def
read_data_files
(
self
,
subset
=
'train'
):
"""Reads from data file and returns images and labels in a numpy array."""
assert
self
.
data_dir
,
(
'Cannot call `read_data_files` when using synthetic '
'data'
)
if
subset
==
'train'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_%d'
%
i
)
for
i
in
xrange
(
1
,
6
)
]
elif
subset
==
'validation'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'test_batch'
)]
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
inputs
=
[]
for
filename
in
filenames
:
with
gfile
.
Open
(
filename
,
'rb'
)
as
f
:
# python2 does not have the encoding parameter
encoding
=
{}
if
six
.
PY2
else
{
'encoding'
:
'bytes'
}
inputs
.
append
(
cPickle
.
load
(
f
,
**
encoding
))
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
all_images
=
np
.
concatenate
(
[
each_input
[
b
'data'
]
for
each_input
in
inputs
]).
astype
(
np
.
float32
)
all_labels
=
np
.
concatenate
(
[
each_input
[
b
'labels'
]
for
each_input
in
inputs
])
return
all_images
,
all_labels
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
50000
elif
subset
==
'validation'
:
return
10000
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
class
COCODataset
(
ImageDataset
):
"""COnfiguration for COCO dataset."""
def
__init__
(
self
,
data_dir
=
None
,
image_size
=
300
):
super
(
COCODataset
,
self
).
__init__
(
'coco'
,
image_size
,
image_size
,
data_dir
=
data_dir
,
num_classes
=
81
)
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
COCO_NUM_TRAIN_IMAGES
elif
subset
==
'validation'
:
return
COCO_NUM_VAL_IMAGES
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
_SUPPORTED_DATASETS
=
{
'imagenet'
:
ImagenetDataset
,
'cifar10'
:
Cifar10Dataset
,
'librispeech'
:
LibrispeechDataset
,
'coco'
:
COCODataset
,
}
_SUPPORTED_INPUT_PREPROCESSORS
=
{
'imagenet'
:
{
'default'
:
preprocessing
.
RecordInputImagePreprocessor
,
'official_models_imagenet'
:
preprocessing
.
ImagenetPreprocessor
,
},
'cifar10'
:
{
'default'
:
preprocessing
.
Cifar10ImagePreprocessor
},
'librispeech'
:
{
'default'
:
preprocessing
.
LibrispeechPreprocessor
},
'coco'
:
{
'default'
:
preprocessing
.
COCOPreprocessor
},
}
def
create_dataset
(
data_dir
,
data_name
):
"""Create a Dataset instance based on data_dir and data_name."""
if
not
data_dir
and
not
data_name
:
# When using synthetic data, use synthetic imagenet images by default.
data_name
=
'imagenet'
# Infere dataset name from data_dir if data_name is not provided.
if
data_name
is
None
:
for
supported_name
in
_SUPPORTED_DATASETS
:
if
supported_name
in
data_dir
:
data_name
=
supported_name
break
else
:
# Failed to identify dataset name from data dir.
raise
ValueError
(
'Could not identify name of dataset. '
'Please specify with --data_name option.'
)
if
data_name
not
in
_SUPPORTED_DATASETS
:
raise
ValueError
(
'Unknown dataset. Must be one of %s'
%
', '
.
join
(
[
key
for
key
in
sorted
(
_SUPPORTED_DATASETS
.
keys
())]))
return
_SUPPORTED_DATASETS
[
data_name
](
data_dir
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
0 → 100644
View file @
f0d87682
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions to define flags and params.
Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
a command-line flag for every ParamSpec defined by a DEFINE_* functions.
The reason we don't use absl flags directly is that we want to be able to use
tf_cnn_benchmarks as a library. When using it as a library, we don't want to
define any flags, but instead pass parameters to the BenchmarkCNN constructor.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
from
absl
import
flags
as
absl_flags
import
six
FLAGS
=
absl_flags
.
FLAGS
# ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
ParamSpec
=
namedtuple
(
'_ParamSpec'
,
[
'flag_type'
,
'default_value'
,
'description'
,
'kwargs'
])
# Maps from parameter name to its ParamSpec.
param_specs
=
{}
def
DEFINE_string
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'string'
,
default
,
help
,
{})
def
DEFINE_boolean
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'boolean'
,
default
,
help
,
{})
def
DEFINE_integer
(
name
,
default
,
help
,
lower_bound
=
None
,
upper_bound
=
None
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'lower_bound'
:
lower_bound
,
'upper_bound'
:
upper_bound
}
param_specs
[
name
]
=
ParamSpec
(
'integer'
,
default
,
help
,
kwargs
)
def
DEFINE_float
(
name
,
default
,
help
,
lower_bound
=
None
,
upper_bound
=
None
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'lower_bound'
:
lower_bound
,
'upper_bound'
:
upper_bound
}
param_specs
[
name
]
=
ParamSpec
(
'float'
,
default
,
help
,
kwargs
)
def
DEFINE_enum
(
name
,
default
,
enum_values
,
help
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'enum_values'
:
enum_values
}
param_specs
[
name
]
=
ParamSpec
(
'enum'
,
default
,
help
,
kwargs
)
def
DEFINE_list
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'list'
,
default
,
help
,
{})
def
define_flags
(
specs
=
None
):
"""Define a command line flag for each ParamSpec in flags.param_specs."""
specs
=
specs
or
param_specs
define_flag
=
{
'boolean'
:
absl_flags
.
DEFINE_boolean
,
'float'
:
absl_flags
.
DEFINE_float
,
'integer'
:
absl_flags
.
DEFINE_integer
,
'string'
:
absl_flags
.
DEFINE_string
,
'enum'
:
absl_flags
.
DEFINE_enum
,
'list'
:
absl_flags
.
DEFINE_list
}
for
name
,
param_spec
in
six
.
iteritems
(
specs
):
if
param_spec
.
flag_type
not
in
define_flag
:
raise
ValueError
(
'Unknown flag_type %s'
%
param_spec
.
flag_type
)
else
:
define_flag
[
param_spec
.
flag_type
](
name
,
param_spec
.
default_value
,
help
=
param_spec
.
description
,
**
param_spec
.
kwargs
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark various leading indicators CNNs.
The purpose of these tests is to test each model as a high level baseline and
to ensure the various variable_update options have not regressing. Not all
options are tested. The tests focus on the most viable options.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
ctypes
import
logging
import
os
import
sys
from
absl
import
flags
from
absl.testing
import
absltest
# pylint: disable=unused-import
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-bad-import-order
import
benchmark_cnn
from
platforms
import
util
as
platforms_util
flags
.
DEFINE_integer
(
'num_batches'
,
None
,
'number of batches to run, excluding warmup'
)
class
BenchmarkBase
(
tf
.
test
.
Benchmark
):
"""Base class for all benchmarks in this file."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""Base class for all benchmarks in this file.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
# Load default values if the benchmark is not run with absl.app.run()
if
not
flags
.
FLAGS
.
is_parsed
():
flags
.
FLAGS
.
mark_as_parsed
()
self
.
fake_data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
self
.
output_dir
=
output_dir
if
root_data_dir
is
None
:
self
.
data_dir
=
(
'/readahead/200M/placer/prod/home/distbelief/'
'imagenet-tensorflow/imagenet-2012-tfrecord'
)
else
:
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def
_run_benchmark
(
self
,
params
):
"""Run a CNN benchmark and report its results.
Args:
params: Params tuple, typically created by benchmark_cnn.make_params or
benchmark_cnn.make_params_from_flags.
"""
logging
.
info
(
'Running benchmark [%s]'
,
self
.
_get_name
())
params
=
benchmark_cnn
.
setup
(
params
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
stats
=
bench
.
run
()
extras
=
{}
extras
[
'examples_per_sec'
]
=
stats
.
get
(
'images_per_sec'
)
if
'last_average_loss'
in
stats
:
extras
[
'last_average_loss'
]
=
stats
[
'last_average_loss'
]
if
'top_1_accuracy'
in
stats
:
extras
[
'top_1_accuracy'
]
=
stats
[
'top_1_accuracy'
]
if
'top_5_accuracy'
in
stats
:
extras
[
'top_5_accuracy'
]
=
stats
[
'top_5_accuracy'
]
self
.
report_benchmark
(
iters
=
stats
.
get
(
'num_steps'
),
wall_time
=
stats
.
get
(
'average_wall_time'
),
extras
=
extras
)
def
_shared_params
(
self
):
"""Returns shared parameters for all benchmarks in this file."""
params
=
{}
if
flags
.
FLAGS
.
num_batches
is
not
None
:
params
[
'num_batches'
]
=
flags
.
FLAGS
.
num_batches
if
self
.
output_dir
is
not
None
:
params
[
'benchmark_log_dir'
]
=
self
.
output_dir
return
benchmark_cnn
.
make_params
(
**
params
)
def
_binary_search_batch_size
(
self
,
params
,
init_batch_size
):
"""Find the max batch_size using binary search."""
assert
init_batch_size
>
0
low_batch_size
=
0
high_batch_size
=
None
batch_size
=
init_batch_size
# No need to run a warmup or many batches; if it doesn't OOM after 10
# batches, it should work in general.
params
=
params
.
_replace
(
num_batches
=
10
,
num_warmup_batches
=
0
)
# Find high_batch_size first.
tf
.
logging
.
info
(
'Looking for upper bound to batch size, starting with %d'
%
batch_size
)
while
high_batch_size
is
None
:
tf
.
logging
.
info
(
'Trying batch_size %d'
%
batch_size
)
params
=
params
.
_replace
(
batch_size
=
batch_size
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
try
:
bench
.
run
()
low_batch_size
=
batch_size
batch_size
*=
2
except
tf
.
errors
.
ResourceExhaustedError
:
high_batch_size
=
batch_size
-
1
# Binary Search
tf
.
logging
.
info
(
'Max batch size is in range (%d, %d]. Starting binary search to find '
'exact max batch size.'
%
(
low_batch_size
,
batch_size
))
while
low_batch_size
<
high_batch_size
:
batch_size
=
(
low_batch_size
+
high_batch_size
+
1
)
//
2
tf
.
logging
.
info
(
'Trying batch_size %d'
%
batch_size
)
params
=
params
.
_replace
(
batch_size
=
batch_size
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
try
:
bench
.
run
()
low_batch_size
=
batch_size
except
tf
.
errors
.
ResourceExhaustedError
:
high_batch_size
=
batch_size
-
1
self
.
report_benchmark
(
extras
=
{
'max_batch_size'
:
low_batch_size
})
class
Resnet50BenchmarksInferenceCpu
(
BenchmarkBase
):
""""Benchmarks for ResNet50 inference on CPU."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
num_gpus
=
1
,
model
=
'resnet50'
,
num_warmup_batches
=
5
,
num_batches
=
50
,
distortions
=
False
,
forward_only
=
True
,
device
=
'cpu'
,
data_format
=
'NHWC'
,
num_intra_threads
=
0
)
def
benchmark_synth_forward_batch1
(
self
):
"""Tests 1 CPU batch size 1."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_forward_batch16
(
self
):
"""Tests 1 CPU batch size 16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
)
self
.
_run_benchmark
(
params
)
class
FrozenResnet50BenchmarksInferenceCpu
(
Resnet50BenchmarksInferenceCpu
):
""""Benchmarks for ResNet50 frozen graph inference on CPU."""
def
_shared_params
(
self
):
return
super
(
FrozenResnet50BenchmarksInferenceCpu
,
self
).
_shared_params
().
_replace
(
freeze_when_forward_only
=
True
)
class
Resnet50BenchmarksInference
(
BenchmarkBase
):
""""Benchmarks for ResNet50 inference."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
num_gpus
=
1
,
model
=
'resnet50'
,
distortions
=
False
,
forward_only
=
True
)
def
benchmark_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 FP16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_forward_batch16
(
self
):
"""Tests 1 GPU batch size 16 FP16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 FP16 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
use_fp16
=
True
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_forward_batch16
(
self
):
"""Tests 1 GPU batch size 16 FP16 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
,
use_fp16
=
True
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
FrozenResnet50BenchmarksInference
(
Resnet50BenchmarksInference
):
""""Benchmarks for ResNet50 frozen graph inference."""
def
_shared_params
(
self
):
return
super
(
FrozenResnet50BenchmarksInference
,
self
).
_shared_params
().
_replace
(
freeze_when_forward_only
=
True
)
def
benchmark_trt_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
trt_mode
=
'FP32'
)
self
.
_run_benchmark
(
params
)
# TODO(laigd): enable fp16 tests for TF-TRT, it's currently not supported yet.
# def benchmark_fp16_trt_synth_forward_batch128(self):
# """Tests 1 GPU batch size 128 FP16."""
# params = self._shared_params()._replace(
# batch_size=128, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
# Test with batch size 16 to compare with native TF GPU implementation and
# XLA.
# def benchmark_fp16_trt_synth_forward_batch16(self):
# """Tests 1 GPU batch size 16 FP16."""
# params = self._shared_params()._replace(
# batch_size=16, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
class
Resnet50Benchmarks
(
BenchmarkBase
):
""""Benchmark resnet50 configurations."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50'
,
batch_size
=
128
,
distortions
=
False
,
optimizer
=
'momentum'
)
def
_shared_params_fp16
(
self
):
"""Returns shared parameters for all ResNet50 FP16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50'
,
batch_size
=
256
,
distortions
=
False
,
use_fp16
=
True
,
optimizer
=
'momentum'
,
loss_type_to_report
=
'base_loss'
,
compute_lr_on_cpu
=
True
,
single_l2_loss_op
=
True
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_synth_4gpu_gpureplicated
(
self
):
"""Tests 4 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
4
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# FP16 mixed-precision tests.
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams_batch128
(
self
):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_4gpu_gpureplicated
(
self
):
"""Tests 4 gpu with synthetic data with nccl and all_reduce."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
4
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic with nccl and all_reduce."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpus with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fakedistort_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake distorted data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
distortions
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of Resnet50 tests only for single GPU.
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing on guitar.
def
benchmark_ng_xla_batch64_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with XLA, synth data, and batch 64."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
batch_size
=
64
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch64_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA, synth data, and batch 64."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
64
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA, and synth data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fakedistort_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake distorted data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
distortions
=
True
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
Resnet50v15Benchmarks
(
BenchmarkBase
):
""""Benchmark various ResNet50V1.5 configurations.
ResNetV1.5 differs from V1 in stride 2 is used in the first 3x3 convolution of
each block instead of the first 1x1 convolution.
"""
def
_shared_params_fp16
(
self
):
"""Returns shared parameters for all ResNet50v1.5 FP16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50_v1.5'
,
batch_size
=
256
,
distortions
=
False
,
use_fp16
=
True
,
optimizer
=
'momentum'
,
loss_type_to_report
=
'base_loss'
,
compute_lr_on_cpu
=
True
,
single_l2_loss_op
=
True
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_batch256_synth_8gpu_gpuparams
(
self
):
"""Tests 8 gpus with synthetic data at batch 256."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of Resnet50v1.5 tests.
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, batch128, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
num_batches
=
200
,
batch_size
=
128
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch256_synth_8gpu_gpuparams
(
self
):
"""Tests 8 gpu with synthetic data and xla autojit."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
batch_size
=
256
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
class
Vgg16Benchmarks
(
BenchmarkBase
):
""""Benchmark various vgg16 configurations."""
def
_shared_params
(
self
):
"""Returns shared parameters for all vgg16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'vgg16'
,
batch_size
=
128
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
all_reduce_spec
=
'nccl'
,
variable_update
=
'replicated'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of VGG16 tests only for single GPU.
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data, and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
TrivialBenchmarks
(
BenchmarkBase
):
""""Benchmarks for trivial model.
The purpose of these tests is to verify the upper bound for the input
pipeline. Fake data creates an upperbound on the input pipeline throughput.
"""
def
_shared_params
(
self
):
"""Returns shared parameters for all trivial benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'trivial'
,
num_gpus
=
8
,
distortions
=
False
,
variable_update
=
'independent'
,
data_dir
=
self
.
fake_data_dir
)
def
benchmark_fake_64batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
64
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_128batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_256batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
256
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fakedistort_128batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
data_name
=
'imagenet'
,
distortions
=
True
)
self
.
_run_benchmark
(
params
)
class
AlexnetBenchmarks
(
BenchmarkBase
):
""""Benchmarks for alexnet."""
def
_shared_params
(
self
):
"""Returns shared parameters for all alexnet benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'alexnet'
,
batch_size
=
512
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA Benchmark tests for AlexNet.
def
benchmark_xla_synth_1gpuparams
(
self
):
"""Tests 1 gpu with synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpuparams
(
self
):
"""Tests 1 gpu with fake data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpuparams
(
self
):
"""Tests 1 gpu with real data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
InceptionV3Benchmarks
(
BenchmarkBase
):
""""Benchmark for InceptionV3."""
def
_shared_params
(
self
):
"""Returns shared parameters for all InceptionV3 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'inception3'
,
batch_size
=
64
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA and synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
NcfBenchmarks
(
BenchmarkBase
):
"""Benchmarks for neural collaborative filtering."""
def
_shared_params
(
self
):
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'ncf'
,
batch_size
=
64
*
1024
,
num_gpus
=
1
,
num_warmup_batches
=
1
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
class
DeepSpeech2Benchmarks
(
BenchmarkBase
):
"""Benchmarks for DeepSpeech2 model."""
def
_shared_params
(
self
):
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'deepspeech2'
,
batch_size
=
32
,
num_gpus
=
1
,
data_name
=
'librispeech'
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
class
SsdBenchmarks
(
BenchmarkBase
):
"""Benchmarks for SSD model."""
def
_cudnn_version
(
self
):
if
sys
.
platform
==
'win32'
:
return
None
lib
=
ctypes
.
cdll
.
LoadLibrary
(
None
)
if
hasattr
(
lib
,
'cudnnGetErrorString'
):
version
=
lib
.
cudnnGetVersion
()
return
version
return
None
def
_shared_params
(
self
):
cudnn_version
=
self
.
_cudnn_version
()
if
cudnn_version
is
None
or
cudnn_version
<
7300
:
raise
RuntimeError
(
'Needs at least cuDNN 7.3 to work with fp16 (b/112048183). '
'Build with --define=use_experimental_cudnn=1'
)
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
# TODO(b/115672206): Replace backbone model and data dir with replicated
# placer location for better performance.
backbone_model_path
=
platforms_util
.
get_ssd_backborn_model_file
(),
# pylint: disable=line-too-long
data_dir
=
platforms_util
.
get_ssd_backboard_data_dir
(),
batch_size
=
128
,
data_name
=
'coco'
,
model
=
'ssd300'
,
num_batches
=
10
,
num_warmup_batches
=
1
,
num_gpus
=
1
,
optimizer
=
'momentum'
,
momentum
=
0.9
,
weight_decay
=
5e-4
,
loss_type_to_report
=
'base_loss'
,
single_l2_loss_op
=
True
,
compute_lr_on_cpu
=
True
,
)
def
benchmark_xla_compile_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_fp16_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
xla_compile
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_fp16_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
xla_compile
=
True
,
use_fp16
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
use_fp16
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
0 → 100644
View file @
f0d87682
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions related to MLPerf compliance.
MLPerf requires submissions to log what the benchmark does, in order to verify
that the benchmark meets the MLPerf requirements. This module contains a global
object `logger` that is used by other files to log what tf_cnn_benchmarks does
for compliance.
By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
unnecessary if one is not concerned about MLPerf compliance. The logger can be
enabled by using the `mlperf_logger` context manager.
To enable the logger with `mlperf_logger`, the MLPerf compliance library at
https://github.com/mlperf/training/tree/master/compliance is required. If
the logger is not enabled, the library is not needed.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
import
contextlib
import
os
import
sys
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-import-not-at-top
try
:
# Not all users have the MLPerf compliance library, so we don't want to
# unconditionally crash if these imports fail.
from
mlperf_compliance
import
mlperf_log
from
mlperf_compliance
import
resnet_log_helper
from
mlperf_compliance
import
tags
import_successful
=
True
except
ImportError
:
# The logger cannot be enabled in this case since the MLPerf library isn't
# found. We return empty strings from the `tags` attribute so that
# the benchmark can still run without crashing. This empty tags are passed
# to an instance of `NullMlPerfLogger`, which does not log anything and
# ignores the tag values.
class
_Tags
(
object
):
def
__getattr__
(
self
,
item
):
return
''
tags
=
_Tags
()
import_successful
=
False
# pylint: enable=g-import-not-at-top
_ModelInfo
=
namedtuple
(
'_ModelInfo'
,
[
'print_fn'
,
'tag_set'
,
'mlperf_model_name'
])
_MLPERF_LOG_PREFIX
=
':::MLPv0.5.0'
class
MlPerfLogger
(
object
):
"""Logs various aspects about a benchmark run for MLPerf compliance."""
def
__init__
(
self
,
model
):
self
.
_root_dir
=
os
.
path
.
split
(
os
.
path
.
abspath
(
__file__
))[
0
]
mlperf_log
.
ROOT_DIR_RESNET
=
self
.
_root_dir
mlperf_log
.
ROOT_DIR_SSD
=
self
.
_root_dir
self
.
model
=
model
model_to_info
=
{
'resnet50_v1.5'
:
_ModelInfo
(
mlperf_log
.
resnet_print
,
mlperf_log
.
RESNET_TAG_SET
,
tags
.
RESNET
),
'ssd300'
:
_ModelInfo
(
mlperf_log
.
ssd_print
,
mlperf_log
.
SSD_TAG_SET
,
tags
.
SSD
)
}
try
:
self
.
_log_fn
,
self
.
tag_set
,
self
.
mlperf_model_name
=
model_to_info
[
model
]
except
KeyError
:
raise
ValueError
(
'--ml_perf_compliance_logging is only compatible when '
'--model is one of the following: '
+
', '
.
join
(
model_to_info
.
keys
()))
def
log
(
self
,
key
,
value
=
None
,
stack_offset
=
2
):
if
key
in
self
.
tag_set
:
self
.
_log_fn
(
key
,
value
,
stack_offset
)
else
:
print
(
'Ignoring MLPerf logging item key=%s, value=%s for model %s'
%
(
key
,
value
,
self
.
model
))
def
log_deferred_tensor_value
(
self
,
key
,
tensor_value
,
global_step
,
stack_offset
=
2
,
every_n
=
1
):
"""Logs the value of a tensor when the graph is run."""
caller
=
'(%s)'
%
mlperf_log
.
get_caller
(
stack_offset
,
self
.
_root_dir
)
def
create_print_op
():
return
tf
.
print
(
_MLPERF_LOG_PREFIX
,
self
.
mlperf_model_name
,
tf
.
timestamp
(),
caller
,
key
,
': { "deferred": true, "value":'
,
tensor_value
,
'}'
,
output_stream
=
sys
.
stdout
)
maybe_print
=
tf
.
cond
(
tf
.
equal
(
global_step
%
every_n
,
0
),
create_print_op
,
tf
.
no_op
)
with
tf
.
control_dependencies
([
maybe_print
]):
return
tf
.
identity
(
tensor_value
)
def
log_max_pool
(
self
,
input_tensor
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_max_pool
(
input_tensor
,
output_tensor
)
def
log_begin_block
(
self
,
input_tensor
,
block_type
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_begin_block
(
input_tensor
,
block_type
)
def
log_end_block
(
self
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_end_block
(
output_tensor
)
def
log_projection
(
self
,
input_tensor
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_projection
(
input_tensor
,
output_tensor
)
def
log_conv2d
(
self
,
input_tensor
,
output_tensor
,
stride_height
,
stride_width
,
filters
,
initializer
,
use_bias
):
"""Log a conv2d call."""
if
self
.
model
==
'resnet50_v1.5'
:
assert
stride_height
==
stride_width
,
(
'--ml_perf_compliance_logging does not support convolutions where '
'the stride height is not equal to the stride width. '
'stride_height=%d, stride_width=%d'
%
(
stride_height
,
stride_width
))
if
isinstance
(
initializer
,
tf
.
truncated_normal_initializer
)
or
(
isinstance
(
initializer
,
tf
.
variance_scaling_initializer
)
and
initializer
.
distribution
==
'truncated_normal'
):
initializer
=
tags
.
TRUNCATED_NORMAL
elif
(
isinstance
(
initializer
,
tf
.
glorot_uniform_initializer
)
or
initializer
is
None
):
initializer
=
'glorot_uniform'
resnet_log_helper
.
log_conv2d
(
input_tensor
,
output_tensor
,
stride_width
,
filters
,
initializer
,
use_bias
)
def
log_batch_norm
(
self
,
input_tensor
,
output_tensor
,
momentum
,
epsilon
,
center
,
scale
,
training
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_batch_norm
(
input_tensor
,
output_tensor
,
momentum
,
epsilon
,
center
,
scale
,
training
)
def
log_train_epochs
(
self
,
num_epochs
):
"""Logs all the TRAIN_EPOCHs log lines."""
num_epochs_int
=
int
(
num_epochs
)
for
i
in
range
(
num_epochs_int
):
# MLPerf allows us to print all the train epochs at once instead of
# printing them as we do them.
self
.
log
(
key
=
mlperf_log
.
TRAIN_EPOCH
,
value
=
i
,
stack_offset
=
3
)
if
num_epochs_int
!=
num_epochs
:
value
=
(
str
(
num_epochs_int
)
+
', but this epoch only has {}% of the examples of a normal epoch'
.
format
(
100
*
(
num_epochs
-
num_epochs_int
)))
self
.
log
(
key
=
mlperf_log
.
TRAIN_EPOCH
,
value
=
value
,
stack_offset
=
3
)
def
log_input_resize_aspect_preserving
(
self
,
height
,
width
,
scale_factor
):
assert
height
==
width
,
(
'--ml_perf_compliance_logging does not support models with nonsquare '
'images. Cannot process image with height=%d and width=%d'
%
(
height
,
width
))
self
.
log
(
key
=
tags
.
INPUT_RESIZE_ASPECT_PRESERVING
,
value
=
{
'min'
:
int
(
height
*
scale_factor
)})
def
log_eval_epoch
(
self
,
tag
,
global_step
,
batch_size
,
stack_offset
=
2
):
if
self
.
model
==
'resnet50_v1.5'
:
self
.
log
(
key
=
tag
,
stack_offset
=
stack_offset
+
1
)
elif
self
.
model
==
'ssd300'
:
epoch
=
int
(
global_step
*
batch_size
/
118287
)
self
.
log
(
key
=
tag
,
value
=
epoch
,
stack_offset
=
stack_offset
+
1
)
def
log_eval_accuracy
(
self
,
accuracy
,
global_step
,
batch_size
,
examples_per_epoch
,
stack_offset
=
2
):
"""Logs eval accuracy."""
epoch
=
int
(
global_step
*
batch_size
/
examples_per_epoch
)
eval_accuracy
=
{
'epoch'
:
epoch
,
'value'
:
accuracy
}
eval_iteration_accuracy
=
{
'iteration'
:
global_step
,
'value'
:
accuracy
}
self
.
log
(
key
=
tags
.
EVAL_ACCURACY
,
value
=
eval_accuracy
,
stack_offset
=
stack_offset
+
1
)
self
.
log
(
key
=
tags
.
EVAL_ITERATION_ACCURACY
,
value
=
eval_iteration_accuracy
,
stack_offset
=
stack_offset
+
1
)
def
_empty_fn
(
*
args
,
**
kwargs
):
del
args
,
kwargs
class
NullMlPerfLogger
(
object
):
"""A version of `MlPerfLogger` that does not log anything.
This class has the same interface as `MlPerfLogger`, but does not actually do
anything. This is used when logging is disabled, which is the default
behavior.
"""
def
__getattr__
(
self
,
item
):
return
_empty_fn
def
log_deferred_tensor_value
(
self
,
key
,
tensor_value
,
*
args
,
**
kwargs
):
del
key
,
args
,
kwargs
return
tensor_value
# A global singleton logger. By default, it's the null logger but can be
# switched to an MlPerfLogger with `mlperf_logger()`.
logger
=
NullMlPerfLogger
()
@
contextlib
.
contextmanager
def
mlperf_logger
(
use_mlperf_logger
,
model
):
"""Optionally enable the mlperf logger.
If `use_mlperf_logger` is True, sets the `logger` global variable to an
instance of MlPerfLogger that will print logs for MLPerf compliance. If
`use_mlperf_logger` is False, does nothing.
Args:
use_mlperf_logger: If True, enables the mlperf logger. If False, this
function does nothing.
model: The model that will be logged. Required, because different models
must log different things for MLPerf compliance.
Yields:
Nothing.
Raises:
ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
library cannot be imported
"""
global
logger
if
use_mlperf_logger
:
if
not
import_successful
:
raise
ImportError
(
'Failed to import MLPerf compliance library, which is '
'required when --ml_perf_compliance_logging is '
'specified. Clone this repo and add this directory '
'https://github.com/mlperf/training/tree/master/'
'compliance to the PYTHONPATH environmental variable.'
)
logger_
=
MlPerfLogger
(
model
)
old_logger
=
logger
try
:
logger
=
logger_
yield
finally
:
logger
=
old_logger
else
:
yield
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
0 → 100644
View file @
f0d87682
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains tests related to MLPerf.
Note this test only passes if the MLPerf compliance library is installed.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
Counter
import
logging
import
re
import
six
import
tensorflow.compat.v1
as
tf
import
benchmark_cnn
import
datasets
import
mlperf
import
test_util
from
models
import
model
from
mlperf_compliance
import
mlperf_log
class
_MlPerfTestModel
(
model
.
CNNModel
):
"""A model to test the MLPerf compliance logging on."""
def
__init__
(
self
):
super
(
_MlPerfTestModel
,
self
).
__init__
(
'mlperf_test_model'
,
image_size
=
224
,
batch_size
=
2
,
learning_rate
=
1
)
def
add_inference
(
self
,
cnn
):
assert
cnn
.
top_layer
.
shape
[
1
:]
==
(
3
,
224
,
224
)
cnn
.
conv
(
1
,
1
,
1
,
1
,
1
,
use_batch_norm
=
True
)
cnn
.
mpool
(
1
,
1
,
1
,
1
,
num_channels_in
=
1
)
cnn
.
reshape
([
-
1
,
224
*
224
])
cnn
.
affine
(
1
,
activation
=
None
)
# Assert that the batch norm variables are filtered out for L2 loss.
variables
=
tf
.
global_variables
()
+
tf
.
local_variables
()
assert
len
(
variables
)
>
len
(
self
.
filter_l2_loss_vars
(
variables
))
class
MlPerfComplianceTest
(
tf
.
test
.
TestCase
):
"""Tests the MLPerf compliance logs.
This serves as a quick check that we probably didn't break the compliance
logging. It is not mean to be as comprehensive as the official MLPerf
compliance checker will be.
"""
def
setUp
(
self
):
super
(
MlPerfComplianceTest
,
self
).
setUp
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
# Map between regex and the number of times we expect to see that regex in the
# logs. Entry commented out with the comment FIXME indicate that
# tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
# fixed to be MLPerf compliant.
EXPECTED_LOG_REGEXES
=
{
# Preprocessing tags
mlperf
.
tags
.
INPUT_ORDER
:
2
,
# 1 for training, 1 for eval
# We pass --tf_random_seed=9876 in the test.
r
'%s: 9876'
%
mlperf
.
tags
.
RUN_SET_RANDOM_SEED
:
2
,
# The Numpy random seed is hardcoded to 4321.
r
'%s: 4321'
%
mlperf
.
tags
.
RUN_SET_RANDOM_SEED
:
2
,
r
'%s: %d'
%
(
mlperf
.
tags
.
PREPROC_NUM_TRAIN_EXAMPLES
,
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
):
1
,
r
'%s: %d'
%
(
mlperf
.
tags
.
PREPROC_NUM_EVAL_EXAMPLES
,
datasets
.
IMAGENET_NUM_VAL_IMAGES
):
1
,
mlperf
.
tags
.
PREPROC_NUM_EVAL_EXAMPLES
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_MIN_OBJ_COV
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_RATIO_RANGE
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_AREA_RANGE
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_MAX_ATTEMPTS
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_RANDOM_FLIP
+
'.*'
:
1
,
r
'%s: \[224, 224\].*'
%
mlperf
.
tags
.
INPUT_CENTRAL_CROP
:
1
,
r
'%s: \[123.68, 116.78, 103.94\].*'
%
mlperf
.
tags
.
INPUT_MEAN_SUBTRACTION
:
2
,
r
'%s: {"min": 256}.*'
%
mlperf
.
tags
.
INPUT_RESIZE_ASPECT_PRESERVING
:
1
,
# 1 for training, 1 for eval
r
'%s: \[224, 224\].*'
%
mlperf
.
tags
.
INPUT_RESIZE
:
2
,
# Resnet model tags
mlperf
.
tags
.
MODEL_HP_BATCH_NORM
+
'.*'
:
2
,
# 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
# produces 2 logs.
mlperf
.
tags
.
MODEL_HP_CONV2D_FIXED_PADDING
+
'.*'
:
4
,
mlperf
.
tags
.
MODEL_HP_RELU
+
'.*'
:
2
,
mlperf
.
tags
.
MODEL_HP_INITIAL_MAX_POOL
+
'.*'
:
2
,
mlperf
.
tags
.
MODEL_HP_DENSE
+
'.*'
:
4
,
mlperf
.
tags
.
MODEL_HP_DENSE
+
'.*'
:
4
,
# Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
# are omitted here.
r
'%s: "categorical_cross_entropy".*'
%
mlperf
.
tags
.
MODEL_HP_LOSS_FN
:
1
,
# 1 for training, 2 because the _MlPerfTestModel calls this when building
# the model for both training and eval
r
'%s: true'
%
mlperf
.
tags
.
MODEL_EXCLUDE_BN_FROM_L2
:
3
,
r
'%s: 0.5.*'
%
mlperf
.
tags
.
MODEL_L2_REGULARIZATION
:
1
,
# Note we do not handle OPT_LR, since that is printed to stderr using
# tf.Print, which we cannot easily intercept.
# Other tags
'%s: "%s"'
%
(
mlperf
.
tags
.
OPT_NAME
,
mlperf
.
tags
.
SGD_WITH_MOMENTUM
):
1
,
'%s: 0.5'
%
mlperf
.
tags
.
OPT_MOMENTUM
:
1
,
mlperf
.
tags
.
RUN_START
:
1
,
'%s: 2'
%
mlperf
.
tags
.
INPUT_BATCH_SIZE
:
1
,
mlperf
.
tags
.
TRAIN_LOOP
:
1
,
mlperf
.
tags
.
TRAIN_EPOCH
+
'.*'
:
1
,
'%s: 2'
%
mlperf
.
tags
.
INPUT_SIZE
:
2
,
mlperf
.
tags
.
EVAL_START
:
2
,
mlperf
.
tags
.
EVAL_STOP
:
2
,
'%s: 6'
%
mlperf
.
tags
.
EVAL_SIZE
:
2
,
mlperf
.
tags
.
EVAL_ACCURACY
+
'.*'
:
2
,
'%s: 2.0'
%
mlperf
.
tags
.
EVAL_TARGET
:
2
,
mlperf
.
tags
.
RUN_STOP
+
'.*'
:
1
,
mlperf
.
tags
.
RUN_FINAL
:
1
}
EXPECTED_LOG_REGEXES
=
Counter
({
re
.
compile
(
k
):
v
for
k
,
v
in
EXPECTED_LOG_REGEXES
.
items
()})
def
testMlPerfCompliance
(
self
):
string_io
=
six
.
StringIO
()
handler
=
logging
.
StreamHandler
(
string_io
)
data_dir
=
test_util
.
create_black_and_white_images
()
try
:
mlperf_log
.
LOGGER
.
addHandler
(
handler
)
params
=
benchmark_cnn
.
make_params
(
data_dir
=
data_dir
,
data_name
=
'imagenet'
,
batch_size
=
2
,
num_warmup_batches
=
0
,
num_batches
=
2
,
num_eval_batches
=
3
,
eval_during_training_every_n_steps
=
1
,
distortions
=
False
,
weight_decay
=
0.5
,
optimizer
=
'momentum'
,
momentum
=
0.5
,
stop_at_top_1_accuracy
=
2.0
,
tf_random_seed
=
9876
,
ml_perf
=
True
)
with
mlperf
.
mlperf_logger
(
use_mlperf_logger
=
True
,
model
=
'resnet50_v1.5'
):
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
model
=
_MlPerfTestModel
())
bench_cnn
.
run
()
logs
=
string_io
.
getvalue
().
splitlines
()
log_regexes
=
Counter
()
for
log
in
logs
:
for
regex
in
self
.
EXPECTED_LOG_REGEXES
:
if
regex
.
search
(
log
):
log_regexes
[
regex
]
+=
1
if
log_regexes
!=
self
.
EXPECTED_LOG_REGEXES
:
diff_counter
=
Counter
(
log_regexes
)
diff_counter
.
subtract
(
self
.
EXPECTED_LOG_REGEXES
)
differences
=
[]
for
regex
in
(
k
for
k
in
diff_counter
.
keys
()
if
diff_counter
[
k
]):
found_count
=
log_regexes
[
regex
]
expected_count
=
self
.
EXPECTED_LOG_REGEXES
[
regex
]
differences
.
append
(
' For regex %s: Found %d lines matching but '
'expected to find %d'
%
(
regex
.
pattern
,
found_count
,
expected_count
))
raise
AssertionError
(
'Logs did not match expected logs. Differences:
\n
'
'%s'
%
'
\n
'
.
join
(
differences
))
finally
:
mlperf_log
.
LOGGER
.
removeHandler
(
handler
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/__init__.py
0 → 100644
View file @
f0d87682
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Alexnet model configuration.
References:
Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
ImageNet Classification with Deep Convolutional Neural Networks
Advances in Neural Information Processing Systems. 2012
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
from
models
import
model
class
AlexnetModel
(
model
.
CNNModel
):
"""Alexnet cnn model."""
def
__init__
(
self
,
params
=
None
):
super
(
AlexnetModel
,
self
).
__init__
(
'alexnet'
,
224
+
3
,
512
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
# Note: VALID requires padding the images by 3 in width and height
cnn
.
conv
(
64
,
11
,
11
,
4
,
4
,
'VALID'
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
conv
(
192
,
5
,
5
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
conv
(
384
,
3
,
3
)
cnn
.
conv
(
384
,
3
,
3
)
cnn
.
conv
(
256
,
3
,
3
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
reshape
([
-
1
,
256
*
6
*
6
])
cnn
.
affine
(
4096
)
cnn
.
dropout
()
cnn
.
affine
(
4096
)
cnn
.
dropout
()
class
AlexnetCifar10Model
(
model
.
CNNModel
):
"""Alexnet cnn model for cifar datasets.
The model architecture follows the one defined in the tensorflow tutorial
model.
Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
"""
def
__init__
(
self
,
params
=
None
):
super
(
AlexnetCifar10Model
,
self
).
__init__
(
'alexnet'
,
32
,
128
,
0.1
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
cnn
.
conv
(
64
,
5
,
5
,
1
,
1
,
'SAME'
,
stddev
=
5e-2
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
cnn
.
lrn
(
depth_radius
=
4
,
bias
=
1.0
,
alpha
=
0.001
/
9.0
,
beta
=
0.75
)
cnn
.
conv
(
64
,
5
,
5
,
1
,
1
,
'SAME'
,
bias
=
0.1
,
stddev
=
5e-2
)
cnn
.
lrn
(
depth_radius
=
4
,
bias
=
1.0
,
alpha
=
0.001
/
9.0
,
beta
=
0.75
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
shape
=
cnn
.
top_layer
.
get_shape
().
as_list
()
flat_dim
=
shape
[
1
]
*
shape
[
2
]
*
shape
[
3
]
cnn
.
reshape
([
-
1
,
flat_dim
])
cnn
.
affine
(
384
,
stddev
=
0.04
,
bias
=
0.1
)
cnn
.
affine
(
192
,
stddev
=
0.04
,
bias
=
0.1
)
def
get_learning_rate
(
self
,
global_step
,
batch_size
):
num_examples_per_epoch
=
50000
num_epochs_per_decay
=
100
decay_steps
=
(
num_epochs_per_decay
*
num_examples_per_epoch
//
batch_size
)
decay_factor
=
0.1
return
tf
.
train
.
exponential_decay
(
self
.
learning_rate
,
global_step
,
decay_steps
,
decay_factor
,
staircase
=
True
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Densenet model configuration.
References:
"Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
from
models
import
model
as
model_lib
class
DensenetCifar10Model
(
model_lib
.
CNNModel
):
"""Densenet cnn network configuration."""
def
__init__
(
self
,
model
,
layer_counts
,
growth_rate
,
params
=
None
):
self
.
growth_rate
=
growth_rate
super
(
DensenetCifar10Model
,
self
).
__init__
(
model
,
32
,
64
,
0.1
,
layer_counts
=
layer_counts
,
params
=
params
)
self
.
batch_norm_config
=
{
'decay'
:
0.9
,
'epsilon'
:
1e-5
,
'scale'
:
True
}
def
dense_block
(
self
,
cnn
,
growth_rate
):
input_layer
=
cnn
.
top_layer
c
=
cnn
.
batch_norm
(
input_layer
,
**
self
.
batch_norm_config
)
c
=
tf
.
nn
.
relu
(
c
)
c
=
cnn
.
conv
(
growth_rate
,
3
,
3
,
1
,
1
,
stddev
=
np
.
sqrt
(
2.0
/
9
/
growth_rate
),
activation
=
None
,
input_layer
=
c
)
channel_index
=
3
if
cnn
.
channel_pos
==
'channels_last'
else
1
cnn
.
top_layer
=
tf
.
concat
([
input_layer
,
c
],
channel_index
)
cnn
.
top_size
+=
growth_rate
def
transition_layer
(
self
,
cnn
):
in_size
=
cnn
.
top_size
cnn
.
batch_norm
(
**
self
.
batch_norm_config
)
cnn
.
top_layer
=
tf
.
nn
.
relu
(
cnn
.
top_layer
)
cnn
.
conv
(
in_size
,
1
,
1
,
1
,
1
,
stddev
=
np
.
sqrt
(
2.0
/
9
/
in_size
))
cnn
.
apool
(
2
,
2
,
2
,
2
)
def
add_inference
(
self
,
cnn
):
if
self
.
layer_counts
is
None
:
raise
ValueError
(
'Layer counts not specified for %s'
%
self
.
get_model
())
if
self
.
growth_rate
is
None
:
raise
ValueError
(
'Growth rate not specified for %s'
%
self
.
get_model
())
cnn
.
conv
(
16
,
3
,
3
,
1
,
1
,
activation
=
None
)
# Block 1
for
_
in
xrange
(
self
.
layer_counts
[
0
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
self
.
transition_layer
(
cnn
)
# Block 2
for
_
in
xrange
(
self
.
layer_counts
[
1
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
self
.
transition_layer
(
cnn
)
# Block 3
for
_
in
xrange
(
self
.
layer_counts
[
2
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
cnn
.
batch_norm
(
**
self
.
batch_norm_config
)
cnn
.
top_layer
=
tf
.
nn
.
relu
(
cnn
.
top_layer
)
channel_index
=
3
if
cnn
.
channel_pos
==
'channels_last'
else
1
cnn
.
top_size
=
cnn
.
top_layer
.
get_shape
().
as_list
()[
channel_index
]
cnn
.
spatial_mean
()
def
get_learning_rate
(
self
,
global_step
,
batch_size
):
num_batches_per_epoch
=
50000
//
batch_size
boundaries
=
num_batches_per_epoch
*
np
.
array
([
150
,
225
,
300
],
dtype
=
np
.
int64
)
boundaries
=
[
x
for
x
in
boundaries
]
values
=
[
0.1
,
0.01
,
0.001
,
0.0001
]
return
tf
.
train
.
piecewise_constant
(
global_step
,
boundaries
,
values
)
def
create_densenet40_k12_model
():
return
DensenetCifar10Model
(
'densenet40_k12'
,
(
12
,
12
,
12
),
12
)
def
create_densenet100_k12_model
():
return
DensenetCifar10Model
(
'densenet100_k12'
,
(
32
,
32
,
32
),
12
)
def
create_densenet100_k24_model
():
return
DensenetCifar10Model
(
'densenet100_k24'
,
(
32
,
32
,
32
),
24
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
0 → 100644
View file @
f0d87682
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DeepSpeech2 model configuration.
References:
https://arxiv.org/abs/1512.02595
Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
itertools
import
numpy
as
np
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
import
constants
from
cnn_util
import
log_fn
from
models
import
model
as
model_lib
from
tensorflow.python.ops
import
variables
# pylint: disable=g-direct-tensorflow-import
class
DeepSpeechDecoder
(
object
):
"""Greedy decoder implementation for Deep Speech model."""
def
__init__
(
self
,
labels
,
blank_index
=
28
):
"""Decoder initialization.
Args:
labels: a string specifying the speech labels for the decoder to use.
blank_index: an integer specifying index for the blank character. Defaults
to 28.
"""
self
.
labels
=
labels
self
.
blank_index
=
blank_index
self
.
int_to_char
=
dict
([(
i
,
c
)
for
(
i
,
c
)
in
enumerate
(
labels
)])
def
convert_to_string
(
self
,
sequence
):
"""Convert a sequence of indexes into corresponding string."""
return
''
.
join
([
self
.
int_to_char
[
i
]
for
i
in
sequence
])
def
wer
(
self
,
decode
,
target
):
"""Computes the Word Error Rate (WER).
WER is defined as the edit distance between the two provided sentences after
tokenizing to words.
Args:
decode: string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number for the WER of the current decode-target pair.
"""
try
:
from
nltk.metrics
import
distance
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'nltk.metrics'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental deepspeech model, you must '
'pip install -U nltk'
)
# Map each word to a new char.
words
=
set
(
decode
.
split
()
+
target
.
split
())
word2char
=
dict
(
zip
(
words
,
range
(
len
(
words
))))
new_decode
=
[
chr
(
word2char
[
w
])
for
w
in
decode
.
split
()]
new_target
=
[
chr
(
word2char
[
w
])
for
w
in
target
.
split
()]
return
distance
.
edit_distance
(
''
.
join
(
new_decode
),
''
.
join
(
new_target
))
def
cer
(
self
,
decode
,
target
):
"""Computes the Character Error Rate (CER).
CER is defined as the edit distance between the two given strings.
Args:
decode: a string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number denoting the CER for the current sentence pair.
"""
try
:
from
nltk.metrics
import
distance
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'nltk.metrics'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental deepspeech model, you must '
'pip install -U nltk'
)
return
distance
.
edit_distance
(
decode
,
target
)
def
decode
(
self
,
char_indexes
):
"""Decode the best guess from logits using greedy algorithm."""
# Merge repeated chars.
merge
=
[
k
for
k
,
_
in
itertools
.
groupby
(
char_indexes
)]
# Remove the blank index in the decoded sequence.
merge_remove_blank
=
[]
for
k
in
merge
:
if
k
!=
self
.
blank_index
:
merge_remove_blank
.
append
(
k
)
return
self
.
convert_to_string
(
merge_remove_blank
)
def
decode_logits
(
self
,
logits
):
"""Decode the best guess from logits using greedy algorithm."""
# Choose the class with maximimum probability.
best
=
list
(
np
.
argmax
(
logits
,
axis
=
1
))
return
self
.
decode
(
best
)
class
DeepSpeech2Model
(
model_lib
.
Model
):
"""Define DeepSpeech2 model."""
# Supported rnn cells.
SUPPORTED_RNNS
=
{
'lstm'
:
tf
.
nn
.
rnn_cell
.
BasicLSTMCell
,
'rnn'
:
tf
.
nn
.
rnn_cell
.
RNNCell
,
'gru'
:
tf
.
nn
.
rnn_cell
.
GRUCell
,
}
# Parameters for batch normalization.
BATCH_NORM_EPSILON
=
1e-5
BATCH_NORM_DECAY
=
0.997
# Filters of convolution layer
CONV_FILTERS
=
32
def
__init__
(
self
,
num_rnn_layers
=
5
,
rnn_type
=
'lstm'
,
is_bidirectional
=
True
,
rnn_hidden_size
=
800
,
use_bias
=
True
,
params
=
None
):
"""Initialize DeepSpeech2 model.
Args:
num_rnn_layers: an integer, the number of rnn layers (default: 5).
rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
rnn_hidden_size: an integer for the number of hidden units in the RNN
cell.
use_bias: a boolean specifying whether to use a bias in the last fc layer.
params: the params from BenchmarkCNN.
"""
super
(
DeepSpeech2Model
,
self
).
__init__
(
'deepspeech2'
,
batch_size
=
128
,
learning_rate
=
0.0005
,
fp16_loss_scale
=
128
,
params
=
params
)
self
.
num_rnn_layers
=
num_rnn_layers
self
.
rnn_type
=
rnn_type
self
.
is_bidirectional
=
is_bidirectional
self
.
rnn_hidden_size
=
rnn_hidden_size
self
.
use_bias
=
use_bias
self
.
num_feature_bins
=
161
self
.
max_time_steps
=
3494
self
.
max_label_length
=
576
def
_batch_norm
(
self
,
inputs
,
training
):
"""Batch normalization layer.
Note that the momentum to use will affect validation accuracy over time.
Batch norm has different behaviors during training/evaluation. With a large
momentum, the model takes longer to get a near-accurate estimation of the
moving mean/variance over the entire training dataset, which means we need
more iterations to see good evaluation results. If the training data is
evenly distributed over the feature space, we can also try setting a smaller
momentum (such as 0.1) to get good evaluation result sooner.
Args:
inputs: input data for batch norm layer.
training: a boolean to indicate if it is in training stage.
Returns:
tensor output from batch norm layer.
"""
return
tf
.
layers
.
batch_normalization
(
inputs
=
inputs
,
momentum
=
DeepSpeech2Model
.
BATCH_NORM_DECAY
,
epsilon
=
DeepSpeech2Model
.
BATCH_NORM_EPSILON
,
fused
=
True
,
training
=
training
)
def
_conv_bn_layer
(
self
,
inputs
,
padding
,
filters
,
kernel_size
,
strides
,
layer_id
,
training
):
"""Defines 2D convolutional + batch normalization layer.
Args:
inputs: input data for convolution layer.
padding: padding to be applied before convolution layer.
filters: an integer, number of output filters in the convolution.
kernel_size: a tuple specifying the height and width of the 2D convolution
window.
strides: a tuple specifying the stride length of the convolution.
layer_id: an integer specifying the layer index.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output from the current layer.
"""
# Perform symmetric padding on the feature dimension of time_step
# This step is required to avoid issues when RNN output sequence is shorter
# than the label length.
inputs
=
tf
.
pad
(
inputs
,
[[
0
,
0
],
[
padding
[
0
],
padding
[
0
]],
[
padding
[
1
],
padding
[
1
]],
[
0
,
0
]])
inputs
=
tf
.
layers
.
conv2d
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
kernel_size
,
strides
=
strides
,
padding
=
'valid'
,
use_bias
=
False
,
activation
=
tf
.
nn
.
relu6
,
name
=
'cnn_{}'
.
format
(
layer_id
))
return
self
.
_batch_norm
(
inputs
,
training
)
def
_rnn_layer
(
self
,
inputs
,
rnn_cell
,
rnn_hidden_size
,
layer_id
,
use_batch_norm
,
is_bidirectional
,
training
):
"""Defines a batch normalization + rnn layer.
Args:
inputs: input tensors for the current layer.
rnn_cell: RNN cell instance to use.
rnn_hidden_size: an integer for the dimensionality of the rnn output
space.
layer_id: an integer for the index of current layer.
use_batch_norm: a boolean specifying whether to perform batch
normalization on input states.
is_bidirectional: a boolean specifying whether the rnn layer is
bi-directional.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output for the current layer.
"""
if
use_batch_norm
:
inputs
=
self
.
_batch_norm
(
inputs
,
training
)
# Construct forward/backward RNN cells.
fw_cell
=
rnn_cell
(
num_units
=
rnn_hidden_size
,
name
=
'rnn_fw_{}'
.
format
(
layer_id
))
if
is_bidirectional
:
bw_cell
=
rnn_cell
(
num_units
=
rnn_hidden_size
,
name
=
'rnn_bw_{}'
.
format
(
layer_id
))
outputs
,
_
=
tf
.
nn
.
bidirectional_dynamic_rnn
(
cell_fw
=
fw_cell
,
cell_bw
=
bw_cell
,
inputs
=
inputs
,
dtype
=
tf
.
float32
,
swap_memory
=
True
)
rnn_outputs
=
tf
.
concat
(
outputs
,
-
1
)
else
:
rnn_outputs
=
tf
.
nn
.
dynamic_rnn
(
fw_cell
,
inputs
,
dtype
=
tf
.
float32
,
swap_memory
=
True
)
return
rnn_outputs
def
get_input_data_types
(
self
,
subset
):
"""Returns the list of data types of the inputs."""
del
subset
# Same data types for both train and validation subsets.
return
[
self
.
data_type
,
tf
.
int32
,
tf
.
int32
,
tf
.
int32
]
def
get_input_shapes
(
self
,
subset
):
"""Returns the list of shapes of the padded inputs."""
del
subset
# Same shapes for both train and validation subsets
return
[
[
self
.
batch_size
,
self
.
max_time_steps
,
self
.
num_feature_bins
,
1
],
[
self
.
batch_size
,
self
.
max_label_length
],
[
self
.
batch_size
,
1
],
[
self
.
batch_size
,
1
],
]
def
get_synthetic_inputs
(
self
,
input_name
,
nclass
):
inputs
=
tf
.
random_uniform
(
self
.
get_input_shapes
(
'train'
)[
0
],
dtype
=
self
.
get_input_data_types
(
'train'
)[
0
])
inputs
=
variables
.
VariableV1
(
inputs
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
input_name
)
labels
=
tf
.
convert_to_tensor
(
np
.
random
.
randint
(
28
,
size
=
[
self
.
batch_size
,
self
.
max_label_length
]))
input_lengths
=
tf
.
convert_to_tensor
(
[
self
.
max_time_steps
]
*
self
.
batch_size
)
label_lengths
=
tf
.
convert_to_tensor
(
[
self
.
max_label_length
]
*
self
.
batch_size
)
return
[
inputs
,
labels
,
input_lengths
,
label_lengths
]
# TODO(laigd): support fp16.
# TODO(laigd): support multiple gpus.
def
build_network
(
self
,
inputs
,
phase_train
=
True
,
nclass
=
29
):
"""Builds the forward pass of the deepspeech2 model.
Args:
inputs: The input list of the model.
phase_train: True during training. False during evaluation.
nclass: Number of classes that the input spectrogram can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
inputs
=
inputs
[
0
]
# Get the spectrogram feature.
# Two cnn layers.
inputs
=
self
.
_conv_bn_layer
(
inputs
,
padding
=
(
20
,
5
),
filters
=
DeepSpeech2Model
.
CONV_FILTERS
,
kernel_size
=
(
41
,
11
),
strides
=
(
2
,
2
),
layer_id
=
1
,
training
=
phase_train
)
inputs
=
self
.
_conv_bn_layer
(
inputs
,
padding
=
(
10
,
5
),
filters
=
DeepSpeech2Model
.
CONV_FILTERS
,
kernel_size
=
(
21
,
11
),
strides
=
(
2
,
1
),
layer_id
=
2
,
training
=
phase_train
)
# output of conv_layer2 with the shape of
# [batch_size (N), times (T), features (F), channels (C)].
# Convert the conv output to rnn input.
# batch_size = tf.shape(inputs)[0]
feat_size
=
inputs
.
get_shape
().
as_list
()[
2
]
inputs
=
tf
.
reshape
(
inputs
,
[
self
.
batch_size
,
-
1
,
feat_size
*
DeepSpeech2Model
.
CONV_FILTERS
])
# RNN layers.
rnn_cell
=
DeepSpeech2Model
.
SUPPORTED_RNNS
[
self
.
rnn_type
]
for
layer_counter
in
xrange
(
self
.
num_rnn_layers
):
# No batch normalization on the first layer.
use_batch_norm
=
(
layer_counter
!=
0
)
inputs
=
self
.
_rnn_layer
(
inputs
,
rnn_cell
,
self
.
rnn_hidden_size
,
layer_counter
+
1
,
use_batch_norm
,
self
.
is_bidirectional
,
phase_train
)
# FC layer with batch norm.
inputs
=
self
.
_batch_norm
(
inputs
,
phase_train
)
logits
=
tf
.
layers
.
dense
(
inputs
,
nclass
,
use_bias
=
self
.
use_bias
)
return
model_lib
.
BuildNetworkResult
(
logits
=
logits
,
extra_info
=
None
)
def
loss_function
(
self
,
inputs
,
build_network_result
):
"""Computes the ctc loss for the current batch of predictions.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
logits
=
build_network_result
.
logits
actual_time_steps
=
inputs
[
2
]
probs
=
tf
.
nn
.
softmax
(
logits
)
ctc_time_steps
=
tf
.
shape
(
probs
)[
1
]
ctc_input_length
=
tf
.
to_float
(
tf
.
multiply
(
actual_time_steps
,
ctc_time_steps
))
ctc_input_length
=
tf
.
to_int32
(
tf
.
floordiv
(
ctc_input_length
,
tf
.
to_float
(
self
.
max_time_steps
)))
label_length
=
inputs
[
3
]
label_length
=
tf
.
to_int32
(
tf
.
squeeze
(
label_length
))
ctc_input_length
=
tf
.
to_int32
(
tf
.
squeeze
(
ctc_input_length
))
labels
=
inputs
[
1
]
sparse_labels
=
tf
.
to_int32
(
tf
.
keras
.
backend
.
ctc_label_dense_to_sparse
(
labels
,
label_length
))
y_pred
=
tf
.
log
(
tf
.
transpose
(
probs
,
perm
=
[
1
,
0
,
2
])
+
tf
.
keras
.
backend
.
epsilon
())
losses
=
tf
.
expand_dims
(
tf
.
nn
.
ctc_loss
(
labels
=
sparse_labels
,
inputs
=
y_pred
,
sequence_length
=
ctc_input_length
,
ignore_longer_outputs_than_inputs
=
True
),
axis
=
1
)
loss
=
tf
.
reduce_mean
(
losses
)
return
loss
PROBABILITY_TENSOR
=
'deepspeech2_prob'
LABEL_TENSOR
=
'deepspeech2_label'
def
accuracy_function
(
self
,
inputs
,
logits
):
"""Returns the ops to evaluate the model performance."""
# Get probabilities of each predicted class
probs
=
tf
.
nn
.
softmax
(
logits
)
assert
probs
.
shape
.
as_list
()[
0
]
==
self
.
batch_size
return
{
(
constants
.
UNREDUCED_ACCURACY_OP_PREFIX
+
self
.
PROBABILITY_TENSOR
):
probs
,
(
constants
.
UNREDUCED_ACCURACY_OP_PREFIX
+
self
.
LABEL_TENSOR
):
inputs
[
1
],
}
def
postprocess
(
self
,
results
):
"""Postprocess results returned from model in Python."""
probs
=
results
[
self
.
PROBABILITY_TENSOR
]
total_wer
,
total_cer
=
0
,
0
speech_labels
=
" abcdefghijklmnopqrstuvwxyz'-"
greedy_decoder
=
DeepSpeechDecoder
(
speech_labels
)
# Evaluate the performance using WER (Word Error Rate) and CER (Character
# Error Rate) as metrics.
targets
=
results
[
self
.
LABEL_TENSOR
]
# The ground truth transcript
for
i
in
range
(
self
.
batch_size
):
# Decode string.
predicted_str
=
greedy_decoder
.
decode_logits
(
probs
[
i
])
expected_str
=
greedy_decoder
.
decode
(
targets
[
i
])
# Compute CER.
total_cer
+=
(
greedy_decoder
.
cer
(
predicted_str
,
expected_str
)
/
len
(
expected_str
))
# Compute WER.
total_wer
+=
(
greedy_decoder
.
wer
(
predicted_str
,
expected_str
)
/
len
(
expected_str
.
split
()))
# Get mean value
total_cer
/=
self
.
batch_size
total_wer
/=
self
.
batch_size
log_fn
(
'total CER: {:f}; total WER: {:f}; total example: {:d}.'
.
format
(
total_cer
,
total_wer
,
self
.
batch_size
))
# TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
return
{
'top_1_accuracy'
:
0.
,
'top_5_accuracy'
:
0.
}
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
0 → 100644
View file @
f0d87682
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
intended to be used only with CNNs.
Only synthetic data with 1 GPU is currently supported.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
from
models
import
model
# Obtained by running the official NCF model with the following command:
# python ncf_main.py --dataset ml-20m
# and printing the number of users and items here:
# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
_NUM_USERS_20M
=
138493
_NUM_ITEMS_20M
=
26744
# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
# uses, ignore variable_scopes, which we rely on for multi-GPU support.
# TODO(reedwm): Support real data. This will require a significant refactor.
# TODO(reedwm): All-reduce IndexedSlices more effectively.
# TODO(reedwm): Support the 1M variant of this model.
class
NcfModel
(
model
.
Model
):
r
"""A model.Model wrapper around the official NCF recommendation model.
To do an NCF run with synthetic data that roughly matches what the official
model does, run:
python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
--weight_decay=0 --sparse_to_dense_grads
"""
def
__init__
(
self
,
params
=
None
):
super
(
NcfModel
,
self
).
__init__
(
'official_ncf'
,
batch_size
=
2048
,
learning_rate
=
0.0005
,
fp16_loss_scale
=
128
,
params
=
params
)
if
self
.
fp16_vars
:
raise
ValueError
(
'NCF model only supports float32 variables for now.'
)
def
build_network
(
self
,
inputs
,
phase_train
=
True
,
nclass
=
1001
):
try
:
from
official.recommendation
import
neumf_model
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'neumf_model'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental NCF model, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models to the PYTHONPATH.'
)
del
nclass
users
,
items
,
_
=
inputs
params
=
{
'num_users'
:
_NUM_USERS_20M
,
'num_items'
:
_NUM_ITEMS_20M
,
'model_layers'
:
(
256
,
256
,
128
,
64
),
'mf_dim'
:
64
,
'mf_regularization'
:
0
,
'mlp_reg_layers'
:
(
0
,
0
,
0
,
0
),
'use_tpu'
:
False
}
user_input
=
tf
.
keras
.
layers
.
Input
(
tensor
=
users
,
name
=
'user_input'
)
item_input
=
tf
.
keras
.
layers
.
Input
(
tensor
=
items
,
name
=
'item_input'
)
if
self
.
data_type
==
tf
.
float32
:
keras_model
=
neumf_model
.
construct_model
(
user_input
,
item_input
,
params
)
logits
=
keras_model
.
output
else
:
assert
self
.
data_type
==
tf
.
float16
old_floatx
=
tf
.
keras
.
backend
.
floatx
()
try
:
tf
.
keras
.
backend
.
set_floatx
(
'float16'
)
# We cannot rely on the variable_scope's fp16 custom getter here,
# because the NCF model uses keras layers, which ignore variable scopes.
# So we use a variable_creator_scope instead.
with
tf
.
variable_creator_scope
(
_fp16_variable_creator
):
keras_model
=
neumf_model
.
construct_model
(
user_input
,
item_input
,
params
)
logits
=
tf
.
cast
(
keras_model
.
output
,
tf
.
float32
)
finally
:
tf
.
keras
.
backend
.
set_floatx
(
old_floatx
)
return
model
.
BuildNetworkResult
(
logits
=
logits
,
extra_info
=
None
)
def
loss_function
(
self
,
inputs
,
build_network_result
):
logits
=
build_network_result
.
logits
# Softmax with the first column of ones is equivalent to sigmoid.
# TODO(reedwm): Actually, the first column should be zeros to be equivalent
# to sigmoid. But, we keep it at ones to match the official models.
logits
=
tf
.
concat
([
tf
.
ones
(
logits
.
shape
,
dtype
=
logits
.
dtype
),
logits
],
axis
=
1
)
return
tf
.
losses
.
sparse_softmax_cross_entropy
(
labels
=
inputs
[
2
],
logits
=
logits
)
def
get_synthetic_inputs
(
self
,
input_name
,
nclass
):
"""Returns the ops to generate synthetic inputs and labels."""
def
users_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,
1
),
minval
=
0
,
maxval
=
_NUM_USERS_20M
,
dtype
=
tf
.
int32
)
users
=
tf
.
Variable
(
users_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_users'
)
def
items_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,
1
),
minval
=
0
,
maxval
=
_NUM_ITEMS_20M
,
dtype
=
tf
.
int32
)
items
=
tf
.
Variable
(
items_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_items'
)
def
labels_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
)
labels
=
tf
.
Variable
(
labels_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_labels'
)
return
[
users
,
items
,
labels
]
def
get_input_shapes
(
self
,
subset
):
del
subset
return
[[
self
.
batch_size
,
1
],
[
self
.
batch_size
,
1
],
[
self
.
batch_size
]]
def
get_input_data_types
(
self
,
subset
):
del
subset
return
[
self
.
int32
,
tf
.
int32
,
tf
.
int32
]
def
_fp16_variable_creator
(
next_creator
,
**
kwargs
):
"""Variable creator to create variables in fp32 and cast them to fp16."""
dtype
=
kwargs
.
get
(
'dtype'
,
None
)
initial_value
=
kwargs
.
get
(
'initial_value'
,
None
)
if
dtype
is
None
:
if
initial_value
is
not
None
and
not
callable
(
initial_value
):
dtype
=
initial_value
.
dtype
if
dtype
==
tf
.
float16
:
if
callable
(
initial_value
):
new_initial_value
=
lambda
:
tf
.
cast
(
initial_value
(),
tf
.
float32
)
else
:
new_initial_value
=
tf
.
cast
(
initial_value
,
tf
.
float32
)
kwargs
[
'dtype'
]
=
tf
.
float32
kwargs
[
'initial_value'
]
=
new_initial_value
var
=
next_creator
(
**
kwargs
)
return
tf
.
cast
(
var
,
dtype
=
tf
.
float16
)
else
:
return
next_creator
(
**
kwargs
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Googlenet model configuration.
References:
Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
Going deeper with convolutions
arXiv preprint arXiv:1409.4842 (2014)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
models
import
model
class
GooglenetModel
(
model
.
CNNModel
):
"""GoogLeNet."""
def
__init__
(
self
,
params
=
None
):
super
(
GooglenetModel
,
self
).
__init__
(
'googlenet'
,
224
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v1
(
cnn
,
k
,
l
,
m
,
n
,
p
,
q
):
cols
=
[[(
'conv'
,
k
,
1
,
1
)],
[(
'conv'
,
l
,
1
,
1
),
(
'conv'
,
m
,
3
,
3
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
p
,
5
,
5
)],
[(
'mpool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
q
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v1'
,
cols
)
cnn
.
conv
(
64
,
7
,
7
,
2
,
2
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
cnn
.
conv
(
64
,
1
,
1
)
cnn
.
conv
(
192
,
3
,
3
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
64
,
96
,
128
,
16
,
32
,
32
)
inception_v1
(
cnn
,
128
,
128
,
192
,
32
,
96
,
64
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
192
,
96
,
208
,
16
,
48
,
64
)
inception_v1
(
cnn
,
160
,
112
,
224
,
24
,
64
,
64
)
inception_v1
(
cnn
,
128
,
128
,
256
,
24
,
64
,
64
)
inception_v1
(
cnn
,
112
,
144
,
288
,
32
,
64
,
64
)
inception_v1
(
cnn
,
256
,
160
,
320
,
32
,
128
,
128
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
256
,
160
,
320
,
32
,
128
,
128
)
inception_v1
(
cnn
,
384
,
192
,
384
,
48
,
128
,
128
)
cnn
.
apool
(
7
,
7
,
1
,
1
,
mode
=
'VALID'
)
cnn
.
reshape
([
-
1
,
1024
])
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception model configuration.
Includes multiple models: inception3, inception4, inception-resnet2.
References:
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
Inception-v4, Inception-ResNet and the Impact of Residual Connections on
Learning
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
Going Deeper with Convolutions
http://arxiv.org/pdf/1409.4842v1.pdf
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
Zbigniew Wojna
Rethinking the Inception Architecture for Computer Vision
arXiv preprint arXiv:1512.00567 (2015)
Inception v3 model: http://arxiv.org/abs/1512.00567
Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
from
models
import
model
class
Inceptionv3Model
(
model
.
CNNModel
):
"""InceptionV3."""
def
__init__
(
self
,
auxiliary
=
False
,
params
=
None
):
self
.
_auxiliary
=
auxiliary
super
(
Inceptionv3Model
,
self
).
__init__
(
'inception3'
,
299
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v3_a
(
cnn
,
n
):
cols
=
[[(
'conv'
,
64
,
1
,
1
)],
[(
'conv'
,
48
,
1
,
1
),
(
'conv'
,
64
,
5
,
5
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
)],
[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
n
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_a'
,
cols
)
def
inception_v3_b
(
cnn
):
cols
=
[[(
'conv'
,
384
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v3_b'
,
cols
)
def
inception_v3_c
(
cnn
,
n
):
cols
=
[[(
'conv'
,
192
,
1
,
1
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
n
,
1
,
7
),
(
'conv'
,
192
,
7
,
1
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
n
,
7
,
1
),
(
'conv'
,
n
,
1
,
7
),
(
'conv'
,
n
,
7
,
1
),
(
'conv'
,
192
,
1
,
7
)],
[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
192
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_c'
,
cols
)
def
inception_v3_d
(
cnn
):
cols
=
[[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
320
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
1
,
7
),
(
'conv'
,
192
,
7
,
1
),
(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v3_d'
,
cols
)
def
inception_v3_e
(
cnn
,
pooltype
):
cols
=
[[(
'conv'
,
320
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
384
,
1
,
3
)],
[(
'share'
,),
(
'conv'
,
384
,
3
,
1
)],
[(
'conv'
,
448
,
1
,
1
),
(
'conv'
,
384
,
3
,
3
),
(
'conv'
,
384
,
1
,
3
)],
[(
'share'
,),
(
'share'
,),
(
'conv'
,
384
,
3
,
1
)],
[(
'mpool'
if
pooltype
==
'max'
else
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
192
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_e'
,
cols
)
def
incept_v3_aux
(
cnn
):
assert
cnn
.
aux_top_layer
is
None
cnn
.
aux_top_layer
=
cnn
.
top_layer
cnn
.
aux_top_size
=
cnn
.
top_size
with
cnn
.
switch_to_aux_top_layer
():
cnn
.
apool
(
5
,
5
,
3
,
3
,
mode
=
'VALID'
)
cnn
.
conv
(
128
,
1
,
1
,
mode
=
'SAME'
)
cnn
.
conv
(
768
,
5
,
5
,
mode
=
'VALID'
,
stddev
=
0.01
)
cnn
.
reshape
([
-
1
,
768
])
cnn
.
use_batch_norm
=
True
cnn
.
conv
(
32
,
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
# 299 x 299 x 3
cnn
.
conv
(
32
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
# 149 x 149 x 32
cnn
.
conv
(
64
,
3
,
3
,
1
,
1
,
mode
=
'SAME'
)
# 147 x 147 x 64
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
# 147 x 147 x 64
cnn
.
conv
(
80
,
1
,
1
,
1
,
1
,
mode
=
'VALID'
)
# 73 x 73 x 80
cnn
.
conv
(
192
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
# 71 x 71 x 192
cnn
.
mpool
(
3
,
3
,
2
,
2
,
'VALID'
)
# 35 x 35 x 192
inception_v3_a
(
cnn
,
32
)
# 35 x 35 x 256 mixed.
inception_v3_a
(
cnn
,
64
)
# 35 x 35 x 288 mixed_1.
inception_v3_a
(
cnn
,
64
)
# 35 x 35 x 288 mixed_2
inception_v3_b
(
cnn
)
# 17 x 17 x 768 mixed_3
inception_v3_c
(
cnn
,
128
)
# 17 x 17 x 768 mixed_4
inception_v3_c
(
cnn
,
160
)
# 17 x 17 x 768 mixed_5
inception_v3_c
(
cnn
,
160
)
# 17 x 17 x 768 mixed_6
inception_v3_c
(
cnn
,
192
)
# 17 x 17 x 768 mixed_7
if
self
.
_auxiliary
:
incept_v3_aux
(
cnn
)
# Auxillary Head logits
inception_v3_d
(
cnn
)
# 17 x 17 x 1280 mixed_8
inception_v3_e
(
cnn
,
'avg'
)
# 8 x 8 x 2048 mixed_9
inception_v3_e
(
cnn
,
'max'
)
# 8 x 8 x 2048 mixed_10
cnn
.
apool
(
8
,
8
,
1
,
1
,
'VALID'
)
# 8 x 8 x 2048
cnn
.
reshape
([
-
1
,
2048
])
# 1 x 1 x 2048
# Stem functions
def
inception_v4_sa
(
cnn
):
cols
=
[[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
96
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sa'
,
cols
)
def
inception_v4_sb
(
cnn
):
cols
=
[[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
,
1
,
1
,
'VALID'
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
64
,
7
,
1
),
(
'conv'
,
64
,
1
,
7
),
(
'conv'
,
96
,
3
,
3
,
1
,
1
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sb'
,
cols
)
def
inception_v4_sc
(
cnn
):
cols
=
[[(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sc'
,
cols
)
# Reduction functions
def
inception_v4_ra
(
cnn
,
k
,
l
,
m
,
n
):
cols
=
[
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
n
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
k
,
1
,
1
),
(
'conv'
,
l
,
3
,
3
),
(
'conv'
,
m
,
3
,
3
,
2
,
2
,
'VALID'
)]
]
cnn
.
inception_module
(
'incept_v4_ra'
,
cols
)
def
inception_v4_rb
(
cnn
):
cols
=
[[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
256
,
1
,
1
),
(
'conv'
,
256
,
1
,
7
),
(
'conv'
,
320
,
7
,
1
),
(
'conv'
,
320
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_rb'
,
cols
)
class
Inceptionv4Model
(
model
.
CNNModel
):
"""Inceptionv4."""
def
__init__
(
self
,
params
=
None
):
super
(
Inceptionv4Model
,
self
).
__init__
(
'inception4'
,
299
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v4_a
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
96
,
1
,
1
)],
[(
'conv'
,
96
,
1
,
1
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
)]]
cnn
.
inception_module
(
'incept_v4_a'
,
cols
)
def
inception_v4_b
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
128
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
224
,
1
,
7
),
(
'conv'
,
256
,
7
,
1
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
1
,
7
),
(
'conv'
,
224
,
7
,
1
),
(
'conv'
,
224
,
1
,
7
),
(
'conv'
,
256
,
7
,
1
)]]
cnn
.
inception_module
(
'incept_v4_b'
,
cols
)
def
inception_v4_c
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
256
,
1
,
1
)],
[(
'conv'
,
256
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
256
,
1
,
3
)],
[(
'share'
,),
(
'conv'
,
256
,
3
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
448
,
1
,
3
),
(
'conv'
,
512
,
3
,
1
),
(
'conv'
,
256
,
3
,
1
)],
[(
'share'
,),
(
'share'
,),
(
'share'
,),
(
'conv'
,
256
,
1
,
3
)]]
cnn
.
inception_module
(
'incept_v4_c'
,
cols
)
cnn
.
use_batch_norm
=
True
cnn
.
conv
(
32
,
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
cnn
.
conv
(
32
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
cnn
.
conv
(
64
,
3
,
3
)
inception_v4_sa
(
cnn
)
inception_v4_sb
(
cnn
)
inception_v4_sc
(
cnn
)
for
_
in
xrange
(
4
):
inception_v4_a
(
cnn
)
inception_v4_ra
(
cnn
,
192
,
224
,
256
,
384
)
for
_
in
xrange
(
7
):
inception_v4_b
(
cnn
)
inception_v4_rb
(
cnn
)
for
_
in
xrange
(
3
):
inception_v4_c
(
cnn
)
cnn
.
spatial_mean
()
cnn
.
dropout
(
0.8
)
TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
0 → 100644
View file @
f0d87682
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Lenet model configuration.
References:
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
Gradient-based learning applied to document recognition
Proceedings of the IEEE (1998)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
models
import
model
class
Lenet5Model
(
model
.
CNNModel
):
"""Lenet5."""
def
__init__
(
self
,
params
=
None
):
super
(
Lenet5Model
,
self
).
__init__
(
'lenet5'
,
28
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
# Note: This matches TF's MNIST tutorial model
cnn
.
conv
(
32
,
5
,
5
)
cnn
.
mpool
(
2
,
2
)
cnn
.
conv
(
64
,
5
,
5
)
cnn
.
mpool
(
2
,
2
)
cnn
.
reshape
([
-
1
,
64
*
7
*
7
])
cnn
.
affine
(
512
)
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment