Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
a32ffa95
Commit
a32ffa95
authored
Feb 03, 2023
by
qianyj
Browse files
update TensorFlow2x test method
parent
e286da17
Changes
268
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
6183 deletions
+0
-6183
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test.py
...ripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test.py
+0
-493
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py
...f_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py
+0
-122
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
...ks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
+0
-1493
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
...n/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
+0
-253
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
...chmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
+0
-129
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
...enchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
+0
-198
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
.../benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
+0
-67
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
...marks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
+0
-498
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
...n/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
+0
-251
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
...tion/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
+0
-93
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
...ster/scripts/tf_cnn_benchmarks/leading_indicators_test.py
+0
-1003
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
...ion/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
+0
-260
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
...enchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
+0
-189
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
...-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
+0
-93
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
...master/scripts/tf_cnn_benchmarks/models/densenet_model.py
+0
-100
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
...ripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+0
-449
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
..._cnn_benchmarks/models/experimental/official_ncf_model.py
+0
-172
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
...aster/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+0
-63
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
...aster/scripts/tf_cnn_benchmarks/models/inception_model.py
+0
-213
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
...ks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
+0
-44
No files found.
Too many changes to show.
To preserve performance only
268 of 268+
files are displayed.
Plain diff
Email patch
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests running benchmark_cnn in distributed mode.
This is done by spawning one process per task. Each process runs
benchmark_cnn_distributed_test_runner.py.
The output for each process is written to disk and can be viewed to debug tests.
See get_test_output_dir() in platforms/default/util.py for more info.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
import
os
import
subprocess
import
time
import
unittest
from
absl
import
flags
as
absl_flags
import
portpicker
import
six
import
tensorflow.compat.v1
as
tf
import
flags
import
test_util
from
platforms
import
util
as
platforms_util
FLAGS
=
absl_flags
.
FLAGS
def
_convert_params_to_flags_list
(
params
):
"""Converts Params to a list of flags. Skips default-valued parameters.
E.g., converts
benchmark_cnn.make_params(batch_size=32, model='resnet50')
to
['--batch_size=32', '--model=resnet50']
Args:
params: Params for BenchmarkCNN.
Returns:
A list of flags.
"""
return
[
'--%s=%s'
%
(
k
,
str
(
v
))
for
k
,
v
in
six
.
iteritems
(
params
.
_asdict
())
if
v
!=
flags
.
param_specs
[
k
].
default_value
]
# When outputting a process's output in the log, maximum number of characters
# to output. The log system does not allow us to output more than this in a
# single log message, but this limit is also useful to avoid the logs from
# becoming too large (the full process output is written to disk).
MAX_OUTPUT_CHARS
=
15000
# A process. name is a string identifying the process in logs. stdout and
# stderr are file objects of the process's stdout and stderr, respectively.
_ProcessInfo
=
namedtuple
(
'_ProcessInfo'
,
[
'name'
,
'popen'
,
'stdout'
,
'stderr'
])
def
_create_task_process
(
job_name
,
task_index
,
args
,
env
,
output_dir
):
"""Creates a process for a single task for benchmark_cnn.
Args:
job_name: 'worker' or 'ps' or ''. Empty string used for non-distributed
mode.
task_index: The index of the task within the cluster.
args: A list of arguments to pass to the task. This function additionally
sets --task_index and --job_name
env: The environment to use for the task.
output_dir: Where to place the output files, storing the task's stdout and
stderr.
Returns:
A _ProcessInfo namedtuple of the running process. The stdout and stderr
fields of this tuple must be closed by the caller once the process ends.
"""
args
=
args
[:]
args
+=
[
'--task_index=%s'
%
task_index
,
'--job_name=%s'
%
job_name
]
name_prefix
=
job_name
or
'local'
process_name
=
'%s_%s'
%
(
name_prefix
,
task_index
)
tf
.
logging
.
info
(
'Spawning %s process: %s'
%
(
process_name
,
' '
.
join
(
args
)))
stdout_filename
=
os
.
path
.
join
(
output_dir
,
'%s_stdout.txt'
%
process_name
)
stderr_filename
=
os
.
path
.
join
(
output_dir
,
'%s_stderr.txt'
%
process_name
)
stdout_file
=
open
(
stdout_filename
,
'w+'
)
stderr_file
=
open
(
stderr_filename
,
'w+'
)
popen
=
subprocess
.
Popen
(
args
,
stdout
=
stdout_file
,
stderr
=
stderr_file
,
env
=
env
)
return
_ProcessInfo
(
process_name
,
popen
,
stdout_file
,
stderr_file
)
def
_wait_for_processes
(
wait_processes
,
kill_processes
):
"""Waits until all `wait_processes` finish, then kills `kill_processes`.
Fails an assert if a process in `wait_processes` finishes unsuccessfully.
The processes in `kill_processes` are assumed to never finish so they are
killed.
Args:
wait_processes: A list of _ProcessInfo tuples. This function will wait
for each to finish.
kill_processes: A list of _ProcessInfo tuples. Each will be killed once
every process in `wait_processes` is finished.
Returns:
A list of strings, each which is a string of the stdout of a wait process.
"""
wait_process_stdouts
=
[
None
]
*
len
(
wait_processes
)
finished_wait_processes
=
set
()
while
len
(
finished_wait_processes
)
<
len
(
wait_processes
):
for
i
,
wait_process
in
enumerate
(
wait_processes
):
if
i
in
finished_wait_processes
:
continue
ret_code
=
wait_process
.
popen
.
poll
()
if
ret_code
is
None
:
continue
tf
.
logging
.
info
(
'{} finished'
.
format
(
wait_process
.
name
))
wait_process
.
stdout
.
seek
(
0
)
wait_process_stdouts
[
i
]
=
wait_process
.
stdout
.
read
()
tf
.
logging
.
info
(
'stdout for {} (last {} chars): {}
\n
'
.
format
(
wait_process
.
name
,
MAX_OUTPUT_CHARS
,
wait_process_stdouts
[
i
][
-
MAX_OUTPUT_CHARS
:]))
wait_process
.
stderr
.
seek
(
0
)
tf
.
logging
.
info
(
'stderr for {} (last {} chars): {}
\n
'
.
format
(
wait_process
.
name
,
MAX_OUTPUT_CHARS
,
wait_process
.
stderr
.
read
()[
-
MAX_OUTPUT_CHARS
:]))
assert
ret_code
==
0
,
'Process failed with return code %d'
%
ret_code
finished_wait_processes
.
add
(
i
)
for
kill_process
in
kill_processes
:
ret_code
=
kill_process
.
popen
.
poll
()
# kill processes should not end until we kill them.
assert
ret_code
is
None
,
'Process returned early with code %d'
%
ret_code
time
.
sleep
(
0.25
)
tf
.
logging
.
info
(
'All wait processes finished'
)
for
i
,
kill_process
in
enumerate
(
kill_processes
):
# Kill each kill process.
kill_process
.
popen
.
kill
()
kill_process
.
popen
.
wait
()
kill_process
.
stdout
.
seek
(
0
)
tf
.
logging
.
info
(
'stdout for {} (last {} chars): {}
\n
'
.
format
(
kill_process
.
name
,
MAX_OUTPUT_CHARS
,
kill_process
.
stdout
.
read
()[
-
MAX_OUTPUT_CHARS
:]))
kill_process
.
stderr
.
seek
(
0
)
tf
.
logging
.
info
(
'stderr for {} (last {} chars): {}
\n
'
.
format
(
kill_process
.
name
,
MAX_OUTPUT_CHARS
,
kill_process
.
stderr
.
read
()[
-
MAX_OUTPUT_CHARS
:]))
return
wait_process_stdouts
def
_spawn_benchmark_processes
(
output_dir_path
,
num_workers
,
num_ps
,
num_controllers
,
params
):
"""Run training or evaluation in spawned processes.
Runs locally if num_workers == 1, num_ps == 0, and num_controllers == 0,
otherwise runs in distributed mode. In either case, one process is spawned
per worker and ps. Waits for training/evaluation to finish before returning.
Args:
output_dir_path: Relative path where stdout and stderr files will be
placed.
num_workers: Number of workers to spawn.
num_ps: Number of ps processes to spawn.
num_controllers: Number of controller processes to spawn (must be 0 or 1).
params: Params for BenchmarkCNN in each subprocess.
Returns:
A list output_list of outputs from all processes that output the
images/sec and accuracy. This process is the controller host in
distributed_all_reduce, and the workers otherwise. output_list[i] is a
list of lines from the ith worker's stdout.
"""
run_distributed
=
num_workers
!=
1
or
num_ps
!=
0
or
num_controllers
!=
0
if
params
.
variable_update
==
'distributed_all_reduce'
:
assert
num_controllers
==
1
or
not
run_distributed
assert
num_ps
==
0
else
:
assert
num_controllers
==
0
output_base_dir
=
platforms_util
.
get_test_output_dir
()
output_dir
=
os
.
path
.
join
(
output_base_dir
,
output_dir_path
)
os
.
makedirs
(
output_dir
)
tf
.
logging
.
info
(
'Outputs of processes will be outputted to: %s'
%
output_dir
)
args
=
platforms_util
.
get_command_to_run_python_module
(
'benchmark_cnn_distributed_test_runner'
)
args
+=
_convert_params_to_flags_list
(
params
)
if
run_distributed
:
worker_ports
=
[
portpicker
.
pick_unused_port
()
for
_
in
range
(
num_workers
)]
ps_ports
=
[
portpicker
.
pick_unused_port
()
for
_
in
range
(
num_ps
)]
controller_ports
=
[
portpicker
.
pick_unused_port
()
for
_
in
range
(
num_controllers
)]
# The numerator is 0.7 instead of 1 to leave some memory for the Cuda
# runtime, etc.
gpu_memory_frac
=
0.7
/
num_workers
args
+=
[
'--gpu_memory_frac_for_testing=%f'
%
gpu_memory_frac
,
'--worker_hosts='
+
','
.
join
(
'localhost:%d'
%
p
for
p
in
worker_ports
)
]
if
num_ps
>
0
:
ps_hosts_str
=
','
.
join
(
'localhost:%d'
%
p
for
p
in
ps_ports
)
args
.
append
(
'--ps_hosts='
+
ps_hosts_str
)
else
:
controller_host_str
=
','
.
join
(
'localhost:%d'
%
p
for
p
in
controller_ports
)
args
.
append
(
'--controller_host='
+
controller_host_str
)
env
=
os
.
environ
.
copy
()
# Allow stdout to be viewed before the process ends.
env
[
'PYTHONUNBUFFERED'
]
=
'1'
worker_processes
=
[]
ps_processes
=
[]
controller_processes
=
[]
try
:
for
i
in
range
(
num_workers
):
job_name
=
'worker'
if
run_distributed
else
''
process
=
_create_task_process
(
job_name
,
i
,
args
,
env
,
output_dir
)
worker_processes
.
append
(
process
)
# Don't let ps or controller processes use the gpu.
env
[
'CUDA_VISIBLE_DEVICES'
]
=
''
for
i
in
range
(
num_ps
):
process
=
_create_task_process
(
'ps'
,
i
,
args
,
env
,
output_dir
)
ps_processes
.
append
(
process
)
for
i
in
range
(
num_controllers
):
process
=
_create_task_process
(
'controller'
,
i
,
args
,
env
,
output_dir
)
controller_processes
.
append
(
process
)
# If all distributed all reduce mode is being used, the controller process
# finishes and the worker processes block forever. Otherwise, the worker
# processes finish and the ps processes block forever. We set
# wait_processes and kill_processes accordingly.
if
controller_processes
:
wait_processes
=
controller_processes
kill_processes
=
worker_processes
else
:
wait_processes
=
worker_processes
kill_processes
=
ps_processes
outputs
=
_wait_for_processes
(
wait_processes
,
kill_processes
)
finally
:
for
process
in
worker_processes
+
ps_processes
+
controller_processes
:
try
:
process
.
popen
.
kill
()
except
OSError
:
pass
# It's OK (and expected) if the process already exited.
process
.
stdout
.
close
()
process
.
stderr
.
close
()
return
[
output
.
splitlines
()
for
output
in
outputs
]
# When this test class is run, a method will fail about 0.3% of the time with a
# gRPC error. It is not clear why this occurs.
# TODO(reedwm): Fix this test class.
class
TfCnnBenchmarksDistributedTest
(
tf
.
test
.
TestCase
):
"""Tests running benchmark_cnn in distributed mode."""
# We cannot check for a GPU via tf.test.is_gpu_available() before the tests in
# this class because it allocates all the GPU memory which would cause the
# spawned processes to run out of GPU memory.
def
_test_distributed
(
self
,
test_name
,
num_workers
,
num_ps
,
params
,
num_controllers
=
0
,
check_output_values
=
False
,
skip
=
None
):
# TODO(reedwm): check_output_values should default to True and be enabled
# on every test. See the TODO in benchmark_cnn_test.py.
def
run_fn
(
run_type
,
inner_params
):
output_dir_path
=
os
.
path
.
join
(
test_name
,
run_type
)
if
run_type
==
'Evaluation'
:
# Distributed evaluation is not supported, so we use a single process.
# We still must spawn another process, because if we evaluate in the
# current process, it would allocate the GPU memory causing future test
# methods to fail.
if
inner_params
.
variable_update
==
'distributed_replicated'
:
inner_params
=
inner_params
.
_replace
(
variable_update
=
'replicated'
)
return
_spawn_benchmark_processes
(
output_dir_path
,
num_workers
=
1
,
num_ps
=
0
,
num_controllers
=
0
,
params
=
inner_params
)
else
:
return
_spawn_benchmark_processes
(
output_dir_path
,
num_workers
,
num_ps
,
num_controllers
,
inner_params
)
return
test_util
.
train_and_eval
(
self
,
run_fn
,
params
,
check_output_values
=
check_output_values
,
skip
=
skip
)
def
testParameterServer
(
self
):
test_name
=
'testParameterServer'
params
=
test_util
.
get_params
(
test_name
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testParameterServerStaged
(
self
):
test_name
=
'testParameterServerStaged'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
staged_vars
=
True
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testReplicated
(
self
):
test_name
=
'testReplicated'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
variable_update
=
'distributed_replicated'
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testAllReducePsgpu
(
self
):
test_name
=
'testAllReducePsgpu'
flags_dict
=
test_util
.
get_params
(
test_name
).
_replace
(
variable_update
=
'distributed_all_reduce'
,
all_reduce_spec
=
'psgpu#4'
)
self
.
_test_distributed
(
test_name
,
2
,
0
,
flags_dict
,
num_controllers
=
1
)
def
testAllReducePscpuXring
(
self
):
test_name
=
'testAllReducePscpuXring'
flags_dict
=
test_util
.
get_params
(
test_name
).
_replace
(
variable_update
=
'distributed_all_reduce'
,
all_reduce_spec
=
'pscpu:2k:xring'
)
self
.
_test_distributed
(
test_name
,
2
,
0
,
flags_dict
,
num_controllers
=
1
)
def
testForwardOnly
(
self
):
test_name
=
'testForwardOnly'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
forward_only
=
True
)
# Evaluation is not supported with --forward_only, so we set skip='eval'.
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
,
skip
=
'eval'
)
def
testSingleWorkerAndPs
(
self
):
test_name
=
'testSingleWorkerAndPs'
params
=
test_util
.
get_params
(
test_name
)
self
.
_test_distributed
(
test_name
,
1
,
1
,
params
)
def
testThreeWorkersAndPses
(
self
):
test_name
=
'testThreeWorkersAndPses'
params
=
test_util
.
get_params
(
test_name
)
self
.
_test_distributed
(
test_name
,
3
,
3
,
params
)
def
testOneWorkerThreePses
(
self
):
test_name
=
'testOneWorkerThreePses'
params
=
test_util
.
get_params
(
test_name
)
self
.
_test_distributed
(
test_name
,
1
,
3
,
params
)
def
testThreeWorkersOnePs
(
self
):
test_name
=
'testThreeWorkersOnePs'
params
=
test_util
.
get_params
(
test_name
)
self
.
_test_distributed
(
test_name
,
3
,
1
,
params
)
def
testNoPrintTrainingAccuracy
(
self
):
test_name
=
'testNoPrintTrainingAccuracy'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
print_training_accuracy
=
False
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testRmspropParameterServer
(
self
):
test_name
=
'testRmspropParameterServer'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
optimizer
=
'rmsprop'
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testMomentumReplicated
(
self
):
test_name
=
'testMomentumReplicated'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
optimizer
=
'momentum'
,
variable_update
=
'distributed_replicated'
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testNoCrossReplicaSyncParameterServerStaged
(
self
):
test_name
=
'testNoCrossReplicaSyncParameterServerStaged'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
staged_vars
=
True
,
cross_replica_sync
=
False
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testSingleGpu
(
self
):
test_name
=
'testSingleGpu'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
num_gpus
=
1
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testBatchGroupSize
(
self
):
test_name
=
'testBatchGroupSize'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
batch_group_size
=
4
,
num_batches
=
100
,
num_warmup_batches
=
5
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testFp16WithFp32Vars
(
self
):
test_name
=
'testFp16WithFp32Vars'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
False
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testFp16WithFp16Vars
(
self
):
test_name
=
'testFp16WithFp16Vars'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_loss_scale
=
1.
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
def
testFp16Replicated
(
self
):
test_name
=
'testFp16Replicated'
params
=
test_util
.
get_params
(
test_name
).
_replace
(
use_fp16
=
True
,
variable_update
=
'distributed_replicated'
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
@
unittest
.
skip
(
'b/147310862: Fails for unknown reason'
)
def
testReplicatedRealData
(
self
):
test_name
=
'testReplicatedRealData'
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
test_name
).
_replace
(
variable_update
=
'distributed_replicated'
,
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
)
self
.
_test_distributed
(
test_name
,
2
,
2
,
params
)
class
DistributedVariableUpdateTest
(
tf
.
test
.
TestCase
):
"""Tests that variables are updated correctly in distributed mode."""
def
_test_variable_update
(
self
,
test_name
,
num_workers
,
num_ps
,
params
,
num_controllers
=
0
):
"""Tests variables are updated correctly when the given params are used."""
output_dir_path
=
os
.
path
.
join
(
test_name
,
'variable_update'
)
logs
=
_spawn_benchmark_processes
(
output_dir_path
,
num_workers
,
num_ps
,
num_controllers
,
params
)
actual_losses
=
[]
for
worker_logs
in
logs
:
outputs
=
test_util
.
get_training_outputs_from_logs
(
worker_logs
,
params
.
print_training_accuracy
)
actual_losses
.
append
([
x
.
loss
for
x
in
outputs
])
inputs
=
test_util
.
get_fake_var_update_inputs
()
expected_losses
=
test_util
.
TestCNNModel
().
manually_compute_losses
(
inputs
,
num_workers
,
params
)
if
params
.
variable_update
==
'distributed_all_reduce'
:
# In distributed all reduce, each step, the controller outputs the average
# of the loss from each worker. So we modify expected losses accordingly.
# E.g, we change [[1, 2], [4, 5]] to [[2.5, 3.5]]
expected_losses
=
[[
sum
(
losses
)
/
num_workers
for
losses
in
zip
(
*
expected_losses
)]]
rtol
=
3e-2
if
params
.
use_fp16
else
1e-5
for
worker_actual_losses
,
worker_expected_losses
in
zip
(
actual_losses
,
expected_losses
):
self
.
assertAllClose
(
worker_actual_losses
[:
len
(
worker_expected_losses
)],
worker_expected_losses
,
rtol
=
rtol
,
atol
=
0.
)
def
_test_variable_updates
(
self
,
test_name
,
params
):
"""Tests variables are updated correctly with various variable updates."""
# Unfortunately, distributed parameter server is non-deterministic with
# multiple workers, because one worker may write to a variable before
# another worker reads it. This probably does not harm training, but it
# does mean we cannot easily test that case. So, we use one worker.
self
.
_test_variable_update
(
test_name
+
'_ps'
,
num_workers
=
1
,
num_ps
=
2
,
num_controllers
=
0
,
params
=
params
.
_replace
(
variable_update
=
'parameter_server'
))
self
.
_test_variable_update
(
test_name
+
'_rep'
,
num_workers
=
2
,
num_ps
=
1
,
num_controllers
=
0
,
params
=
params
.
_replace
(
variable_update
=
'distributed_replicated'
))
self
.
_test_variable_update
(
test_name
+
'_allreduce'
,
num_workers
=
2
,
num_ps
=
0
,
num_controllers
=
1
,
params
=
params
.
_replace
(
variable_update
=
'distributed_all_reduce'
,
all_reduce_spec
=
'psgpu#%d'
%
params
.
num_gpus
))
def
testVarUpdateDefault
(
self
):
params
=
test_util
.
get_var_update_params
()
self
.
_test_variable_updates
(
'testVarUpdateDefault'
,
params
)
def
testVarUpdateCpuAsLocalParamDevice
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
local_parameter_device
=
'cpu'
)
self
.
_test_variable_updates
(
'testVarUpdateCpuAsLocalParamDevice'
,
params
)
def
testVarUpdateFp16
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_fp16
=
True
)
self
.
_test_variable_updates
(
'testVarUpdateFp16'
,
params
)
def
testVarUpdateResourceVars
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_resource_vars
=
True
)
self
.
_test_variable_updates
(
'testVarUpdateResourceVars'
,
params
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Used to run benchmark_cnn for distributed tests.
In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could
directly spawn tf_cnn_benchmark processes, but we want some added functionality,
such as being able to inject custom images during training. So instead, this
file is spawned as a Python process, which supports the added functionality.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl
import
flags
as
absl_flags
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
import
benchmark_cnn
import
flags
import
preprocessing
import
test_util
absl_flags
.
DEFINE_string
(
'fake_input'
,
'none'
,
"""What fake input to inject into benchmark_cnn. This
is ignored if --model=test_model.
Options are:
none: Do not inject any fake input.
zeros_and_ones: Half the images will be all 0s with
a label of 0. Half the images will be all 1s with a
label of 1."""
)
flags
.
define_flags
()
FLAGS
=
flags
.
FLAGS
def
get_test_image_preprocessor
(
batch_size
,
params
):
"""Returns the preprocessing.TestImagePreprocessor that should be injected.
Returns None if no preprocessor should be injected.
Args:
batch_size: The batch size across all GPUs.
params: BenchmarkCNN's parameters.
Returns:
Returns the preprocessing.TestImagePreprocessor that should be injected.
Raises:
ValueError: Flag --fake_input is an invalid value.
"""
if
FLAGS
.
fake_input
==
'none'
:
return
None
elif
FLAGS
.
fake_input
==
'zeros_and_ones'
:
half_batch_size
=
batch_size
//
2
images
=
np
.
zeros
((
batch_size
,
227
,
227
,
3
),
dtype
=
np
.
float32
)
images
[
half_batch_size
:,
:,
:,
:]
=
1
labels
=
np
.
array
([
0
]
*
half_batch_size
+
[
1
]
*
half_batch_size
,
dtype
=
np
.
int32
)
preprocessor
=
preprocessing
.
TestImagePreprocessor
(
batch_size
,
[
227
,
227
,
3
],
params
.
num_gpus
,
benchmark_cnn
.
get_data_type
(
params
))
preprocessor
.
set_fake_data
(
images
,
labels
)
preprocessor
.
expected_subset
=
'validation'
if
params
.
eval
else
'train'
return
preprocessor
else
:
raise
ValueError
(
'Invalid --fake_input: %s'
%
FLAGS
.
fake_input
)
def
run_with_real_model
(
params
):
"""Runs tf_cnn_benchmarks with a real model."""
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
preprocessor
=
get_test_image_preprocessor
(
bench
.
batch_size
,
params
)
if
preprocessor
is
not
None
:
# The test image preprocessor requires queue runners. Since this file is
# used for testing, it is OK to access protected members.
# pylint: disable=protected-access
bench
.
dataset
.
_queue_runner_required
=
True
# pylint: enable=protected-access
bench
.
input_preprocessor
=
preprocessor
bench
.
run
()
def
run_with_test_model
(
params
):
"""Runs tf_cnn_benchmarks with a test model."""
model
=
test_util
.
TestCNNModel
()
inputs
=
test_util
.
get_fake_var_update_inputs
()
with
test_util
.
monkey_patch
(
benchmark_cnn
,
LOSS_AND_ACCURACY_DIGITS_TO_SHOW
=
15
):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
dataset
=
test_util
.
TestDataSet
(),
model
=
model
)
# The test model does not use labels when computing loss, so the label
# values do not matter as long as it's the right shape.
labels
=
np
.
array
([
1
]
*
inputs
.
shape
[
0
])
bench
.
input_preprocessor
.
set_fake_data
(
inputs
,
labels
)
bench
.
run
()
def
main
(
_
):
params
=
benchmark_cnn
.
make_params_from_flags
()
params
=
benchmark_cnn
.
setup
(
params
)
if
params
.
model
==
'test_model'
:
run_with_test_model
(
params
)
else
:
run_with_real_model
(
params
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
app
.
run
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for benchmark_cnn."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
glob
import
os
import
re
import
unittest
import
mock
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
from
google.protobuf
import
text_format
from
tensorflow.core.framework
import
step_stats_pb2
from
tensorflow.core.profiler
import
tfprof_log_pb2
from
tensorflow.python.platform
import
test
import
benchmark_cnn
import
datasets
import
flags
import
preprocessing
import
test_util
import
variable_mgr_util
from
platforms
import
util
as
platforms_util
def
_check_has_gpu
():
if
not
test
.
is_gpu_available
(
cuda_only
=
True
):
raise
ValueError
(
"""You have asked to run part or all of this on GPU, but it appears
that no GPU is available. If your machine has GPUs it is possible you
do not have a version of TensorFlow with GPU support. To build with GPU
support, add --config=cuda to the build flags.
\n
"""
)
class
TfCnnBenchmarksModelTest
(
tf
.
test
.
TestCase
):
"""Tests which are run with multiple models."""
def
setUp
(
self
):
super
(
TfCnnBenchmarksModelTest
,
self
).
setUp
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
get_model_name
(
self
):
return
None
# Return true to run tests that don't need to be run on every model.
# This should be done for one or two cheap models.
def
extended_tests
(
self
):
return
False
# Return false to suppress actually running the model; this is useful
# for tests that are large.
def
model_execution_test
(
self
):
return
False
# Return false to suppress actually saving and loading the model.
def
model_save_load_test
(
self
):
return
False
def
testSaveLoadModel
(
self
):
_check_has_gpu
()
if
not
self
.
get_model_name
()
or
not
self
.
model_save_load_test
():
return
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
0
,
num_inter_threads
=
0
,
distortions
=
False
,
batch_size
=
2
,
variable_update
=
'replicated'
,
num_warmup_batches
=
0
,
num_gpus
=
2
,
train_dir
=
test_util
.
get_temp_dir
(
'testSaveLoadModel_'
+
self
.
get_model_name
()))
# Run one batch and save the model.
# Note that this uses a non-test session.
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
self
.
assertEqual
(
bench
.
init_global_step
,
0
)
# Clear the default graph.
tf
.
reset_default_graph
()
# Test if checkpoint had been saved.
ckpt
=
tf
.
train
.
get_checkpoint_state
(
params
.
train_dir
)
match
=
re
.
match
(
os
.
path
.
join
(
params
.
train_dir
,
r
'model.ckpt-(\d+).index'
),
ckpt
.
model_checkpoint_path
+
'.index'
)
self
.
assertTrue
(
match
)
self
.
assertGreaterEqual
(
int
(
match
.
group
(
1
)),
params
.
num_batches
)
params
=
params
.
_replace
(
num_batches
=
2
)
# Reload the model
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
# Check if global step has been restored.
self
.
assertNotEqual
(
bench
.
init_global_step
,
0
)
ckpt
=
tf
.
train
.
get_checkpoint_state
(
params
.
train_dir
)
match
=
re
.
match
(
os
.
path
.
join
(
params
.
train_dir
,
r
'model.ckpt-(\d+).index'
),
ckpt
.
model_checkpoint_path
+
'.index'
)
self
.
assertTrue
(
match
)
self
.
assertGreaterEqual
(
int
(
match
.
group
(
1
)),
params
.
num_batches
)
# Check that the batch norm moving averages are restored from checkpoints
with
tf
.
Graph
().
as_default
():
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
_build_model
()
saver
=
tf
.
train
.
Saver
(
bench
.
variable_mgr
.
savable_variables
())
with
tf
.
Session
(
config
=
benchmark_cnn
.
create_config_proto
(
params
))
as
sess
:
benchmark_cnn
.
load_checkpoint
(
saver
,
sess
,
params
.
train_dir
)
sess
.
run
(
bench
.
variable_mgr
.
get_post_init_ops
())
bn_moving_vars
=
[
v
for
v
in
tf
.
global_variables
()
if
'/batchnorm'
in
v
.
name
and
'/moving'
in
v
.
name
]
self
.
assertGreater
(
len
(
bn_moving_vars
),
0
)
for
moving_var
in
bn_moving_vars
:
moving_var_value
=
sess
.
run
(
moving_var
)
# Check that the moving means and moving variances have been restored
# by asserting they are not their default values of 0 and 1,
# respectively
if
'/moving_mean'
in
moving_var
.
name
:
self
.
assertFalse
(
np
.
array_equal
(
moving_var_value
,
np
.
zeros
(
moving_var_value
.
shape
,
moving_var_value
.
dtype
)))
else
:
self
.
assertIn
(
'/moving_variance'
,
moving_var
.
name
)
self
.
assertFalse
(
np
.
array_equal
(
moving_var_value
,
np
.
ones
(
moving_var_value
.
shape
,
moving_var_value
.
dtype
)))
def
testModel
(
self
):
_check_has_gpu
()
if
not
self
.
get_model_name
()
or
not
self
.
model_execution_test
():
return
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
1
,
num_inter_threads
=
12
,
batch_size
=
2
,
distortions
=
False
)
# Run this one; note that this uses a non-test session.
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
run
()
def
testSendRecvVariables
(
self
):
self
.
_testVariables
(
'parameter_server'
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
local_parameter_device
=
'CPU'
)
self
.
_testVariables
(
'parameter_server'
,
optimizer
=
'sgd'
)
def
testReplicatedVariables
(
self
):
self
.
_testVariables
(
'replicated'
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'replicated'
,
all_reduce_spec
=
None
)
self
.
_testVariables
(
'replicated'
,
use_fp16
=
True
,
fp16_vars
=
False
)
self
.
_testVariables
(
'replicated'
,
all_reduce_spec
=
None
,
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
True
,
fp16_inc_loss_scale_every_n
=
4
)
def
testIndependentVariables
(
self
):
self
.
_testVariables
(
'independent'
)
self
.
_testVariables
(
'independent'
,
all_reduce_spec
=
None
,
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
True
,
fp16_inc_loss_scale_every_n
=
4
)
def
testSummaryVerbosity
(
self
):
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
1
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
2
)
self
.
_testVariables
(
'parameter_server'
,
summary_verbosity
=
3
)
def
testStagedVariables
(
self
):
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
)
if
self
.
extended_tests
():
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
,
local_parameter_device
=
'CPU'
)
self
.
_testVariables
(
'parameter_server'
,
staged_vars
=
True
,
use_fp16
=
True
,
fp16_vars
=
True
)
def
_assert_correct_var_type
(
self
,
var
,
params
):
if
'gpu_cached_inputs'
not
in
var
.
name
:
if
params
.
use_fp16
and
params
.
fp16_vars
and
'batchnorm'
not
in
var
.
name
:
expected_type
=
tf
.
float16
else
:
expected_type
=
tf
.
float32
self
.
assertEqual
(
var
.
dtype
.
base_dtype
,
expected_type
)
def
_testVariables
(
self
,
variable_update
,
summary_verbosity
=
0
,
local_parameter_device
=
'GPU'
,
staged_vars
=
False
,
optimizer
=
'momentum'
,
# TODO(b/80125832): Enable nccl in tests
# all_reduce_spec='nccl',
all_reduce_spec
=
''
,
use_fp16
=
False
,
fp16_vars
=
False
,
fp16_enable_auto_loss_scale
=
False
,
fp16_inc_loss_scale_every_n
=
10
):
if
not
self
.
get_model_name
():
return
_check_has_gpu
()
params
=
benchmark_cnn
.
make_params
(
model
=
self
.
get_model_name
(),
num_batches
=
1
,
num_intra_threads
=
1
,
num_inter_threads
=
12
,
distortions
=
False
,
variable_update
=
variable_update
,
local_parameter_device
=
local_parameter_device
,
num_gpus
=
2
,
summary_verbosity
=
summary_verbosity
,
staged_vars
=
staged_vars
,
optimizer
=
optimizer
,
all_reduce_spec
=
all_reduce_spec
,
compact_gradient_transfer
=
False
if
all_reduce_spec
==
'nccl'
else
True
,
use_fp16
=
use_fp16
,
fp16_loss_scale
=
2.
,
fp16_vars
=
fp16_vars
,
fp16_enable_auto_loss_scale
=
fp16_enable_auto_loss_scale
,
fp16_inc_loss_scale_every_n
=
fp16_inc_loss_scale_every_n
,
)
# Test building models using multiple GPUs, but don't
# run them.
with
self
.
test_session
(
graph
=
tf
.
Graph
()):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
_build_model
()
# Rough validation of variable type and placement, depending on mode.
all_vars
=
tf
.
global_variables
()
+
tf
.
local_variables
()
if
params
.
variable_update
==
'parameter_server'
:
for
v
in
all_vars
:
tf
.
logging
.
debug
(
'var: %s'
%
v
.
name
)
match
=
re
.
match
(
r
'tower_(\d+)/v/gpu_cached_inputs:0'
,
v
.
name
)
if
match
:
self
.
assertEqual
(
v
.
device
,
'/device:GPU:%s'
%
match
.
group
(
1
))
elif
v
.
name
.
startswith
(
'v/'
):
self
.
assertEqual
(
v
.
device
,
'/device:%s:0'
%
local_parameter_device
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
in
(
'input_processing/images:0'
,
'input_processing/labels:0'
,
'init_learning_rate:0'
,
'global_step:0'
,
'loss_scale:0'
,
'loss_scale_normal_steps:0'
):
self
.
assertEqual
(
v
.
device
,
'/device:CPU:0'
)
else
:
raise
ValueError
(
'Unexpected variable %s'
%
v
.
name
)
else
:
v0_count
=
0
v1_count
=
0
for
v
in
all_vars
:
if
v
.
name
.
startswith
(
'tower_0/v0/'
):
self
.
assertEqual
(
v
.
name
,
'tower_0/v0/gpu_cached_inputs:0'
)
self
.
assertEqual
(
v
.
device
,
'/device:GPU:0'
)
elif
v
.
name
.
startswith
(
'tower_1/v1/'
):
self
.
assertEqual
(
v
.
name
,
'tower_1/v1/gpu_cached_inputs:0'
)
self
.
assertEqual
(
v
.
device
,
'/device:GPU:1'
)
elif
v
.
name
.
startswith
(
'v0/'
):
v0_count
+=
1
self
.
assertEqual
(
v
.
device
,
'/device:GPU:0'
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
.
startswith
(
'v1/'
):
v1_count
+=
1
self
.
assertEqual
(
v
.
device
,
'/device:GPU:1'
)
self
.
_assert_correct_var_type
(
v
,
params
)
elif
v
.
name
in
(
'input_processing/images:0'
,
'input_processing/labels:0'
,
'init_learning_rate:0'
,
'global_step:0'
,
'loss_scale:0'
,
'loss_scale_normal_steps:0'
):
self
.
assertEqual
(
v
.
device
,
'/device:CPU:0'
)
else
:
raise
ValueError
(
'Unexpected variable %s'
%
v
.
name
)
self
.
assertEqual
(
v0_count
,
v1_count
)
# Validate summary ops in the model depending on verbosity level
summary_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
SUMMARIES
)
num_summary_ops
=
len
(
summary_ops
)
self
.
assertEqual
(
num_summary_ops
>
0
,
summary_verbosity
>
0
)
if
summary_verbosity
>
0
:
has_affine_histogram
=
False
has_gradient_histogram
=
False
has_log_gradients_histogram
=
False
for
op
in
summary_ops
:
if
'/gradients'
in
op
.
name
:
has_gradient_histogram
=
True
elif
'/affine'
in
op
.
name
:
has_affine_histogram
=
True
elif
'log_gradients'
in
op
.
name
:
has_log_gradients_histogram
=
True
self
.
assertEqual
(
summary_verbosity
>=
3
,
has_affine_histogram
)
self
.
assertEqual
(
summary_verbosity
>=
3
,
has_gradient_histogram
)
self
.
assertEqual
(
summary_verbosity
>=
2
,
has_log_gradients_histogram
)
if
summary_verbosity
==
1
:
self
.
assertLess
(
num_summary_ops
,
10
)
class
TrivialModelTest
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'trivial'
class
TestVgg1Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'vgg11'
class
TestVgg19Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'vgg19'
class
TestLenet5Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'lenet'
class
TestGooglenetModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'googlenet'
class
TestOverfeatModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'overfeat'
class
TestAlexnetModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'alexnet'
def
extended_tests
(
self
):
return
True
class
TestTrivialModel
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'trivial'
class
TestInceptionv3Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'inception3'
def
extended_tests
(
self
):
return
True
class
TestInceptionv4Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'inception4'
class
TestResnet50Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet50'
def
model_save_load_test
(
self
):
return
True
class
TestResnet101Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet101'
class
TestResnet152Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet152'
class
TestResnet50V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet50_v2'
class
TestResnet101V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet101_v2'
class
TestResnet152V2Model
(
TfCnnBenchmarksModelTest
):
def
get_model_name
(
self
):
return
'resnet152_v2'
class
TfCnnBenchmarksTest
(
tf
.
test
.
TestCase
):
"""Tests that benchmark_cnn runs correctly."""
def
setUp
(
self
):
super
(
TfCnnBenchmarksTest
,
self
).
setUp
()
_check_has_gpu
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
_run_benchmark_cnn
(
self
,
params
):
logs
=
[]
benchmark_cnn
.
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)
benchmark_cnn
.
BenchmarkCNN
(
params
).
run
()
return
logs
def
_run_benchmark_cnn_with_fake_images
(
self
,
params
,
images
,
labels
):
logs
=
[]
benchmark_cnn
.
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
input_preprocessor
=
preprocessing
.
TestImagePreprocessor
(
params
.
batch_size
*
params
.
num_gpus
,
[[
params
.
batch_size
,
227
,
227
,
3
],
[
params
.
batch_size
]],
params
.
num_gpus
,
bench
.
model
.
data_type
)
bench
.
dataset
.
_queue_runner_required
=
True
bench
.
input_preprocessor
.
set_fake_data
(
images
,
labels
)
bench
.
input_preprocessor
.
expected_subset
=
(
'validation'
if
params
.
eval
else
'train'
)
bench
.
run
()
return
logs
def
_run_benchmark_cnn_with_black_and_white_images
(
self
,
params
):
"""Runs BenchmarkCNN with black and white images.
A BenchmarkCNN is created and run with black and white images as input. Half
the images are black (i.e., filled with 0s) and half are white (i.e., filled
with 255s).
Args:
params: Params for BenchmarkCNN.
Returns:
A list of lines from the output of BenchmarkCNN.
"""
# TODO(reedwm): Instead of generating images here, use black and white
# tfrecords by calling test_util.create_black_and_white_images().
effective_batch_size
=
params
.
batch_size
*
params
.
num_gpus
half_batch_size
=
effective_batch_size
//
2
images
=
np
.
zeros
((
effective_batch_size
,
227
,
227
,
3
),
dtype
=
np
.
float32
)
images
[
half_batch_size
:,
:,
:,
:]
=
255
labels
=
np
.
array
([
0
]
*
half_batch_size
+
[
1
]
*
half_batch_size
,
dtype
=
np
.
int32
)
return
self
.
_run_benchmark_cnn_with_fake_images
(
params
,
images
,
labels
)
def
_train_and_eval_local
(
self
,
params
,
check_output_values
=
False
,
max_final_loss
=
10.
,
skip
=
None
,
use_test_preprocessor
=
True
):
# TODO(reedwm): check_output_values should default to True and be enabled
# on every test. Currently, if check_output_values=True and the calls to
# tf.set_random_seed(...) and np.seed(...) are passed certain seed values in
# benchmark_cnn.py, then most tests will fail. This indicates the tests
# are brittle and could fail with small changes when
# check_output_values=True, so check_output_values defaults to False for
# now.
def
run_fn
(
run_type
,
inner_params
):
del
run_type
if
use_test_preprocessor
:
return
[
self
.
_run_benchmark_cnn_with_black_and_white_images
(
inner_params
)
]
else
:
return
[
self
.
_run_benchmark_cnn
(
inner_params
)]
return
test_util
.
train_and_eval
(
self
,
run_fn
,
params
,
check_output_values
=
check_output_values
,
max_final_loss
=
max_final_loss
,
skip
=
skip
)
def
testAlexnet
(
self
):
params
=
test_util
.
get_params
(
'testAlexnet'
).
_replace
(
num_batches
=
30
,
init_learning_rate
=
0.01
,
model
=
'alexnet'
)
self
.
_train_and_eval_local
(
params
)
def
testNoPrintAccuracy
(
self
):
params
=
test_util
.
get_params
(
'testNoPrintAccuracy'
).
_replace
(
print_training_accuracy
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testLowAccuracy
(
self
):
params
=
test_util
.
get_params
(
'testLowAccuracy'
).
_replace
(
print_training_accuracy
=
True
,
batch_size
=
5
,
num_batches
=
10
)
# We force low accuracy by having each batch containing 10 identical images,
# each with a different label. This guarantees a top-1 accuracy of exactly
# 0.1 and a top-5 accuracy of exactly 0.5.
images
=
np
.
zeros
((
10
,
227
,
227
,
3
),
dtype
=
np
.
float32
)
labels
=
np
.
arange
(
10
,
dtype
=
np
.
int32
)
logs
=
self
.
_run_benchmark_cnn_with_fake_images
(
params
,
images
,
labels
)
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
params
.
print_training_accuracy
)
last_output
=
training_outputs
[
-
1
]
# TODO(reedwm): These should be assertEqual but for some reason,
# occasionally the accuracies are lower (Running this test 500 times, these
# asserts failed twice). Investigate this problem.
self
.
assertLessEqual
(
last_output
.
top_1_accuracy
,
0.1
)
self
.
assertLessEqual
(
last_output
.
top_5_accuracy
,
0.5
)
def
testParameterServer
(
self
):
params
=
test_util
.
get_params
(
'testParameterServer'
)
self
.
_train_and_eval_local
(
params
)
def
testParameterServerStaged
(
self
):
params
=
test_util
.
get_params
(
'testParameterServerStaged'
).
_replace
(
staged_vars
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testReplicated
(
self
):
params
=
test_util
.
get_params
(
'testReplicated'
).
_replace
(
variable_update
=
'replicated'
)
self
.
_train_and_eval_local
(
params
)
def
testIndependent
(
self
):
params
=
test_util
.
get_params
(
'testIndependent'
).
_replace
(
variable_update
=
'independent'
)
self
.
_train_and_eval_local
(
params
)
def
testForwardOnly
(
self
):
params
=
test_util
.
get_params
(
'testForwardOnly'
).
_replace
(
forward_only
=
True
)
# Evaluation is not supported with --forward_only, so we set skip='eval'.
self
.
_train_and_eval_local
(
params
,
skip
=
'eval'
)
def
testForwardOnlyAndFreeze
(
self
):
params
=
test_util
.
get_params
(
'testForwardOnlyAndFreeze'
).
_replace
(
forward_only
=
True
,
freeze_when_forward_only
=
True
,
train_dir
=
None
)
# Training is not supported with --freeze_when_forward_only.
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
def
testNoDistortions
(
self
):
params
=
test_util
.
get_params
(
'testNoDistortions'
).
_replace
(
distortions
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testCpuAsLocalParamDevice
(
self
):
params
=
test_util
.
get_params
(
'testCpuAsLocalParamDevice'
).
_replace
(
local_parameter_device
=
'cpu'
)
self
.
_train_and_eval_local
(
params
)
def
testNHWC
(
self
):
params
=
test_util
.
get_params
(
'testNHWC'
).
_replace
(
data_format
=
'NHWC'
)
self
.
_train_and_eval_local
(
params
)
def
testCpuAsDevice
(
self
):
params
=
test_util
.
get_params
(
'testCpuAsDevice'
).
_replace
(
device
=
'cpu'
,
data_format
=
'NHWC'
)
# NHWC required when --device=cpu
self
.
_train_and_eval_local
(
params
)
def
testMomentumParameterServer
(
self
):
params
=
test_util
.
get_params
(
'testMomentumParameterServer'
).
_replace
(
optimizer
=
'momentum'
,
momentum
=
0.8
)
self
.
_train_and_eval_local
(
params
)
def
testRmspropReplicated
(
self
):
params
=
test_util
.
get_params
(
'testRmspropReplicated'
).
_replace
(
variable_update
=
'replicated'
,
optimizer
=
'rmsprop'
,
rmsprop_decay
=
0.8
,
rmsprop_momentum
=
0.6
,
rmsprop_epsilon
=
0.7
,
init_learning_rate
=
0.01
)
self
.
_train_and_eval_local
(
params
)
def
testBatchGroupSize
(
self
):
params
=
test_util
.
get_params
(
'testBatchGroupSize'
).
_replace
(
batch_group_size
=
4
,
num_batches
=
100
,
num_warmup_batches
=
5
)
self
.
_train_and_eval_local
(
params
)
def
testGradientClip
(
self
):
params
=
test_util
.
get_params
(
'testGradientClip'
).
_replace
(
gradient_clip
=
100.0
)
self
.
_train_and_eval_local
(
params
)
def
testWeightDecay
(
self
):
params
=
test_util
.
get_params
(
'testWeightDecay'
).
_replace
(
weight_decay
=
0.0001
)
self
.
_train_and_eval_local
(
params
)
def
testNoLayers
(
self
):
params
=
test_util
.
get_params
(
'testNoLayers'
).
_replace
(
use_tf_layers
=
False
)
self
.
_train_and_eval_local
(
params
)
def
testSaveModelSteps
(
self
):
params
=
test_util
.
get_params
(
'testSaveModelSteps'
).
_replace
(
save_model_steps
=
2
,
num_warmup_batches
=
0
,
num_batches
=
10
,
max_ckpts_to_keep
=
3
)
self
.
_train_and_eval_local
(
params
)
for
i
in
range
(
1
,
20
+
1
):
# We train for 20 steps, since self._train_and_eval_local() does two
# training runs of 10 steps each. We save a checkpoint every 2 steps and
# keep the last 3 checkpoints, so at the end, we should have checkpoints
# for steps 16, 18, and 20.
matches
=
glob
.
glob
(
os
.
path
.
join
(
params
.
train_dir
,
'model.ckpt-{}.*'
.
format
(
i
)))
if
i
in
(
16
,
18
,
20
):
self
.
assertTrue
(
matches
)
else
:
self
.
assertFalse
(
matches
)
def
testFp16WithFp32Vars
(
self
):
params
=
test_util
.
get_params
(
'testFp16WithFp32Vars'
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
False
,
fp16_loss_scale
=
1.
)
self
.
_train_and_eval_local
(
params
)
def
testFp16WithFp16Vars
(
self
):
params
=
test_util
.
get_params
(
'testFp16WithFp16Vars'
).
_replace
(
use_fp16
=
True
,
fp16_vars
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testXlaCompile
(
self
):
params
=
test_util
.
get_params
(
'testXlaCompile'
).
_replace
(
xla_compile
=
True
)
self
.
_train_and_eval_local
(
params
)
@
unittest
.
skip
(
'Fails for unknown reason'
)
def
testXlaCompileWithFp16
(
self
):
params
=
test_util
.
get_params
(
'testXlaCompileWithFp16'
).
_replace
(
use_fp16
=
True
,
xla_compile
=
True
)
self
.
_train_and_eval_local
(
params
)
def
testGradientRepacking
(
self
):
params
=
test_util
.
get_params
(
'testGradientRepacking1'
).
_replace
(
gradient_repacking
=
2
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
params
=
test_util
.
get_params
(
'testGradientRepacking2'
).
_replace
(
gradient_repacking
=
2
,
use_fp16
=
True
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
def
testTraceFileChromeTraceFormat
(
self
):
trace_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTraceFileChromeTraceFormat_tracefile'
)
params
=
test_util
.
get_params
(
'testTraceFileChromeTraceFormat'
).
_replace
(
trace_file
=
trace_file
,
use_chrome_trace_format
=
True
)
self
.
_train_and_eval_local
(
params
)
self
.
assertGreater
(
os
.
stat
(
trace_file
).
st_size
,
0
)
def
testTraceFileStepStatsProto
(
self
):
trace_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTraceFileStepStatsProto_tracefile'
)
params
=
test_util
.
get_params
(
'testTraceFileStepStatsProto'
).
_replace
(
trace_file
=
trace_file
,
use_chrome_trace_format
=
False
)
self
.
_train_and_eval_local
(
params
)
self
.
assertGreater
(
os
.
stat
(
trace_file
).
st_size
,
0
)
with
open
(
trace_file
)
as
f
:
step_stats
=
step_stats_pb2
.
StepStats
()
# The following statement should not raise an exception.
contents
=
f
.
read
()
text_format
.
Merge
(
contents
,
step_stats
)
def
testTfprofFile
(
self
):
tfprof_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'testTfprofFile_tfproffile'
)
params
=
test_util
.
get_params
(
'testTfprofFile'
).
_replace
(
tfprof_file
=
tfprof_file
)
self
.
_train_and_eval_local
(
params
,
skip
=
'eval_and_train_from_checkpoint'
)
self
.
assertGreater
(
os
.
stat
(
tfprof_file
).
st_size
,
0
)
with
open
(
tfprof_file
,
'rb'
)
as
f
:
profile_proto
=
tfprof_log_pb2
.
ProfileProto
()
# The following statement should not raise an exception.
profile_proto
.
ParseFromString
(
f
.
read
())
@
unittest
.
skip
(
'Fails for unknown reason'
)
def
testMoveTrainDir
(
self
):
params
=
test_util
.
get_params
(
'testMoveTrainDir'
)
self
.
_train_and_eval_local
(
params
)
new_train_dir
=
params
.
train_dir
+
'_moved'
os
.
rename
(
params
.
train_dir
,
new_train_dir
)
params
=
params
.
_replace
(
train_dir
=
new_train_dir
,
eval
=
True
)
self
.
_run_benchmark_cnn_with_black_and_white_images
(
params
)
@
mock
.
patch
(
'tensorflow.compat.v1.train.Saver'
)
@
mock
.
patch
(
'benchmark_cnn._get_checkpoint_to_load'
)
def
testLoadCheckpoint
(
self
,
mock_checkpoint_to_load
,
mock_saver
):
"""Tests load checkpoint with full path to checkpoint."""
expected_checkpoint
=
'/path/to/checkpoints/model.ckpt-1243'
mock_checkpoint_to_load
.
return_value
=
expected_checkpoint
global_batch
=
benchmark_cnn
.
load_checkpoint
(
mock_saver
,
None
,
expected_checkpoint
)
self
.
assertEqual
(
global_batch
,
1243
)
def
testGetCheckpointToLoadFullPath
(
self
):
"""Tests passing full path."""
ckpt_path
=
'/foo/bar/model.ckpt-189'
full_path
=
benchmark_cnn
.
_get_checkpoint_to_load
(
ckpt_path
)
self
.
assertEqual
(
full_path
,
ckpt_path
)
def
testGetCheckpointToLoadException
(
self
):
"""Tests exception for directory without a checkpoint."""
ckpt_path
=
'/foo/bar/checkpoints'
self
.
assertRaises
(
benchmark_cnn
.
CheckpointNotFoundException
,
benchmark_cnn
.
_get_checkpoint_to_load
,
ckpt_path
)
@
mock
.
patch
(
'tensorflow.compat.v1.train.get_checkpoint_state'
)
def
testGetCheckpointToLoad
(
self
,
mock_checkpoint_state
):
"""Tests passing path to checkpoint folder."""
expected_checkpoint
=
'/path/to/checkpoints/model.ckpt-1243'
mock_checkpoint_state
.
return_value
=
mock
.
Mock
(
model_checkpoint_path
=
expected_checkpoint
)
ckpt_path
=
'/path/to/checkpoints/'
full_path
=
benchmark_cnn
.
_get_checkpoint_to_load
(
ckpt_path
)
self
.
assertEqual
(
full_path
,
expected_checkpoint
)
def
testImagenetPreprocessor
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessor'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testImagenetPreprocessorNoDistortions
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessorNoDistortions'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
,
distortions
=
False
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testImagenetPreprocessorVerboseSummary
(
self
):
imagenet_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
params
=
test_util
.
get_params
(
'testImagenetPreprocessorVerboseSummary'
).
_replace
(
data_dir
=
imagenet_dir
,
data_name
=
'imagenet'
,
distortions
=
False
,
summary_verbosity
=
2
)
self
.
_train_and_eval_local
(
params
,
use_test_preprocessor
=
False
)
def
testCifar10SyntheticData
(
self
):
params
=
test_util
.
get_params
(
'testCifar10SyntheticData'
).
_replace
(
data_name
=
'cifar10'
)
self
.
_train_and_eval_local
(
params
)
def
testShiftRatio
(
self
):
test_util
.
monkey_patch_base_cluster_manager
()
params
=
benchmark_cnn
.
make_params
(
data_name
=
'imagenet'
,
data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
),
job_name
=
'worker'
,
worker_hosts
=
'w1,w2,w3,w4'
,
ps_hosts
=
'p1'
,
task_index
=
0
)
self
.
assertEqual
(
benchmark_cnn
.
BenchmarkCNN
(
params
).
input_preprocessor
.
shift_ratio
,
0.0
)
params
=
params
.
_replace
(
task_index
=
3
)
self
.
assertEqual
(
benchmark_cnn
.
BenchmarkCNN
(
params
).
input_preprocessor
.
shift_ratio
,
0.75
)
def
testDistributedReplicatedSavableVars
(
self
):
test_util
.
monkey_patch_base_cluster_manager
()
params
=
benchmark_cnn
.
make_params
(
variable_update
=
'distributed_replicated'
,
model
=
'inception4'
,
data_name
=
'imagenet'
,
data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
),
job_name
=
'worker'
,
worker_hosts
=
'w1,w2,w3,w4'
,
ps_hosts
=
'p1'
,
datasets_use_prefetch
=
False
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
tf
.
Graph
().
as_default
():
bench
.
_build_model
()
savable_vars
=
bench
.
variable_mgr
.
savable_variables
()
# Assert all global variables are in savable_vars
for
v
in
tf
.
global_variables
():
if
not
v
.
name
.
startswith
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
+
'/v0'
):
self
.
assertEqual
(
v
.
name
,
'global_step:0'
)
name
=
bench
.
variable_mgr
.
_strip_port
(
v
.
name
)
if
name
.
startswith
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
):
name
=
name
[
len
(
variable_mgr_util
.
PS_SHADOW_VAR_PREFIX
+
'/'
):]
self
.
assertIn
(
name
,
savable_vars
)
self
.
assertIn
(
savable_vars
[
name
],
tf
.
global_variables
())
# Assert all local variables on the first tower are in savable_vars
for
v
in
tf
.
local_variables
():
if
v
.
name
.
startswith
(
'v0/'
):
name
=
bench
.
variable_mgr
.
_strip_port
(
v
.
name
)
self
.
assertIn
(
name
,
savable_vars
)
def
_test_preprocessing_eval
(
self
,
image_height
,
image_width
,
output_height
,
output_width
):
image
=
tf
.
fill
((
image_height
,
image_width
,
3
),
tf
.
constant
(
128
,
dtype
=
tf
.
uint8
))
params
=
benchmark_cnn
.
make_params
()
new_image
=
preprocessing
.
eval_image
(
image
,
output_height
,
output_width
,
0
,
'bilinear'
,
params
.
summary_verbosity
)
with
self
.
test_session
()
as
sess
:
new_image_value
=
sess
.
run
(
new_image
)
self
.
assertAllEqual
(
new_image_value
,
np
.
full
((
output_height
,
output_width
,
3
),
128
,
dtype
=
np
.
uint8
))
def
testPreprocessingEval
(
self
):
self
.
_test_preprocessing_eval
(
10
,
10
,
4
,
4
)
self
.
_test_preprocessing_eval
(
4
,
4
,
10
,
10
)
self
.
_test_preprocessing_eval
(
1
,
100
,
100
,
1
)
self
.
_test_preprocessing_eval
(
100
,
1
,
1
,
100
)
self
.
_test_preprocessing_eval
(
1
,
100
,
1
,
100
)
def
_test_preprocessing_traing
(
self
,
image_buf
,
image_color
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
,
fuse_decode_and_crop
):
new_image
=
preprocessing
.
train_image
(
image_buf
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
=
summary_verbosity
,
fuse_decode_and_crop
=
fuse_decode_and_crop
)
self
.
assertEqual
(
new_image
.
shape
,
[
output_height
,
output_width
,
3
])
with
self
.
test_session
(
use_gpu
=
True
)
as
sess
:
new_image_value
=
sess
.
run
(
new_image
)
self
.
assertAllClose
(
new_image_value
,
np
.
full
(
[
output_height
,
output_width
,
3
],
image_color
,
dtype
=
np
.
float32
),
atol
=
50.
,
rtol
=
0.
)
def
testPreprocessingTrain
(
self
):
test_data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'images'
)
black_file
=
os
.
path
.
join
(
test_data_dir
,
'black_image.jpg'
)
with
open
(
black_file
,
'rb'
)
as
f
:
black_jpg_buffer
=
f
.
read
()
white_file
=
os
.
path
.
join
(
test_data_dir
,
'white_image.jpg'
)
with
open
(
white_file
,
'rb'
)
as
f
:
white_jpg_buffer
=
f
.
read
()
bbox
=
tf
.
zeros
((
1
,
0
,
4
),
dtype
=
tf
.
float32
)
batch_position
=
0
# Each size config is (output_height, output_width, resize_method)
size_configs
=
[(
100
,
100
,
'round_robin'
),
(
150
,
10
,
'bilinear'
),
(
10
,
150
,
'nearest'
)]
# Each image config is (image_buf, image_color)
image_configs
=
[(
white_jpg_buffer
,
255
),
(
black_jpg_buffer
,
0
)]
for
(
image_buf
,
image_color
)
in
image_configs
:
for
output_height
,
output_width
,
resize_method
in
size_configs
:
for
distortions
in
[
True
,
False
]:
for
summary_verbosity
in
[
0
,
2
]:
for
fuse_decode_and_crop
in
[
True
,
False
]:
self
.
_test_preprocessing_traing
(
image_buf
,
image_color
,
output_height
,
output_width
,
bbox
,
batch_position
,
resize_method
,
distortions
,
summary_verbosity
,
fuse_decode_and_crop
)
def
_test_learning_rate
(
self
,
params
,
global_step_to_expected_learning_rate
):
self
.
longMessage
=
True
# pylint: disable=invalid-name
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
tf
.
Graph
().
as_default
()
as
graph
:
bench
.
_build_model
()
global_step
=
graph
.
get_tensor_by_name
(
'global_step:0'
)
learning_rate
=
graph
.
get_tensor_by_name
(
'learning_rate_tensor:0'
)
with
self
.
test_session
(
graph
=
graph
,
use_gpu
=
True
)
as
sess
:
items
=
global_step_to_expected_learning_rate
.
items
()
for
global_step_val
,
expected_learning_rate
in
items
:
self
.
assertAlmostEqual
(
sess
.
run
(
learning_rate
,
{
global_step
:
global_step_val
}),
expected_learning_rate
,
msg
=
'at global_step:{}'
.
format
(
global_step_val
))
def
testLearningRateModelSpecificResNet
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'parameter_server'
,
num_gpus
=
1
)
self
.
_test_learning_rate
(
params
,
{
0
:
0
,
150136
:
0.128
,
150137
:
0.0128
,
300273
:
0.0128
,
300274
:
0.00128
,
10000000
:
0.0000128
})
def
testLearningRateUserProvidedInitLr
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'replicated'
,
init_learning_rate
=
1.
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
10000000
:
1.
})
def
testLearningRateUserProvidedInitLrAndWarmup
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
batch_size
=
256
,
variable_update
=
'replicated'
,
init_learning_rate
=
1.
,
num_learning_rate_warmup_epochs
=
5
)
self
.
_test_learning_rate
(
params
,
{
0
:
0.
,
12511
:
0.5
,
25022
:
1.
,
10000000
:
1.
})
def
testLearningRateUserProvidedDecayInfo
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
init_learning_rate
=
1.
,
learning_rate_decay_factor
=
0.5
,
num_epochs_per_decay
=
2
,
minimum_learning_rate
=
0.3750
,
batch_size
=
32
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
80071
:
1.
,
80072
:
0.5
,
160143
:
0.5
,
160144
:
0.375
,
10000000
:
0.375
})
def
testLearningRateUserProvidedZeroDecay
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'resnet50'
,
num_learning_rate_warmup_epochs
=
0
,
learning_rate_decay_factor
=
0.5
,
num_epochs_per_decay
=
0
,
minimum_learning_rate
=
0.3750
,
batch_size
=
32
)
with
self
.
assertRaises
(
ValueError
):
with
tf
.
Graph
().
as_default
():
# This will fail because params.learning_rate_decay_factor cannot be
# nonzero if params.num_epochs_per_decay is zero.
benchmark_cnn
.
BenchmarkCNN
(
params
).
_build_model
()
def
testLearningRateUserProvidedSchedule
(
self
):
params
=
benchmark_cnn
.
make_params
(
model
=
'trivial'
,
batch_size
=
32
,
piecewise_learning_rate_schedule
=
'1;3;.1;5;.01'
)
self
.
_test_learning_rate
(
params
,
{
0
:
1.
,
120108
:
1.
,
120109
:
0.1
,
200181
:
0.1
,
200182
:
0.01
,
100000000
:
0.01
})
def
testNumBatchesAndEpochs
(
self
):
params
=
benchmark_cnn
.
make_params
()
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
10
,
100
)
self
.
assertEqual
(
batches
,
benchmark_cnn
.
_DEFAULT_NUM_BATCHES
)
self
.
assertAlmostEqual
(
epochs
,
float
(
benchmark_cnn
.
_DEFAULT_NUM_BATCHES
)
/
10
)
params
=
benchmark_cnn
.
make_params
(
num_batches
=
21
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
25
,
50
)
self
.
assertEqual
(
batches
,
21
)
self
.
assertAlmostEqual
(
epochs
,
10.5
)
params
=
benchmark_cnn
.
make_params
(
num_epochs
=
3
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
2
,
3
)
self
.
assertEqual
(
batches
,
5
)
self
.
assertAlmostEqual
(
epochs
,
10.
/
3.
)
params
=
benchmark_cnn
.
make_params
(
num_epochs
=
4
)
batches
,
epochs
=
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
2
,
3
)
self
.
assertEqual
(
batches
,
6
)
self
.
assertAlmostEqual
(
epochs
,
4
)
with
self
.
assertRaises
(
ValueError
):
params
=
benchmark_cnn
.
make_params
(
num_batches
=
100
,
num_epochs
=
100
)
benchmark_cnn
.
get_num_batches_and_epochs
(
params
,
1
,
1
)
def
_testEvalDuringTraining
(
self
,
params
,
expected_num_eval_batches_found
):
# The idea of this test is that all train images are black and all eval
# images are white. We pass the images through the TestModel, and ensure
# the outputs are as expected.
batch_size
=
params
.
batch_size
eval_batch_size
=
params
.
eval_batch_size
or
params
.
batch_size
class
TestModel
(
test_util
.
TestCNNModel
):
def
__init__
(
self
):
super
(
TestModel
,
self
).
__init__
()
self
.
depth
=
3
def
add_inference
(
self
,
cnn
):
if
cnn
.
phase_train
:
# This will allow us to test that 100 is only added during training
# and not during eval.
cnn
.
top_layer
+=
100
assert
cnn
.
top_layer
.
shape
[
0
]
==
batch_size
else
:
assert
cnn
.
top_layer
.
shape
[
0
]
==
eval_batch_size
# Reduce the image to a single number. The number should be (-1 + 100)
# during training and 1 during testing.
cnn
.
top_layer
=
tf
.
reshape
(
cnn
.
top_layer
,
(
cnn
.
top_layer
.
shape
[
0
],
-
1
))
cnn
.
top_layer
=
tf
.
reduce_mean
(
cnn
.
top_layer
,
axis
=
1
)
cnn
.
top_layer
=
tf
.
reshape
(
cnn
.
top_layer
,
(
cnn
.
top_layer
.
shape
[
0
],
1
,
1
,
1
))
cnn
.
top_size
=
1
trainable_vars
=
tf
.
trainable_variables
()
# The super method will compute image*A*B, where A=1 and B=2.
super
(
TestModel
,
self
).
add_inference
(
cnn
)
if
not
cnn
.
phase_train
:
# Assert no new variables were added, since they should be reused from
# training.
assert
len
(
trainable_vars
)
==
len
(
tf
.
trainable_variables
())
model
=
TestModel
()
dataset
=
datasets
.
ImagenetDataset
(
params
.
data_dir
)
logs
=
[]
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
model
=
model
,
dataset
=
dataset
)
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)):
bench_cnn
.
run
()
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
print_training_accuracy
=
False
)
self
.
assertEqual
(
len
(
training_outputs
),
params
.
num_batches
)
expected_training_output
=
(
-
1
+
100
)
*
1
*
2
for
training_output
in
training_outputs
:
self
.
assertEqual
(
training_output
.
loss
,
expected_training_output
)
eval_outputs
=
test_util
.
get_evaluation_outputs_from_logs
(
logs
)
self
.
assertTrue
(
eval_outputs
)
expected_eval_output
=
1
*
1
*
2
for
eval_output
in
eval_outputs
:
self
.
assertEqual
(
eval_output
.
top_1_accuracy
,
expected_eval_output
)
self
.
assertEqual
(
eval_output
.
top_5_accuracy
,
expected_eval_output
)
num_eval_batches_found
=
0
eval_batch_regex
=
re
.
compile
(
r
'^\d+\t[0-9.]+ examples/sec$'
)
for
log
in
logs
:
if
eval_batch_regex
.
match
(
log
):
num_eval_batches_found
+=
1
self
.
assertEqual
(
num_eval_batches_found
,
expected_num_eval_batches_found
)
def
testEvalDuringTraining
(
self
):
data_dir
=
test_util
.
create_black_and_white_images
()
base_params
=
test_util
.
get_params
(
'testEvalDuringTraining'
)
train_dir
=
base_params
.
train_dir
base_params
=
base_params
.
_replace
(
train_dir
=
None
,
print_training_accuracy
=
False
,
num_warmup_batches
=
0
,
num_batches
=
7
,
num_eval_batches
=
2
,
display_every
=
1
,
init_learning_rate
=
0
,
weight_decay
=
0
,
distortions
=
False
,
data_dir
=
data_dir
)
expected_num_eval_batches_found
=
(
base_params
.
num_eval_batches
*
(
base_params
.
num_batches
//
2
+
1
))
# Test --eval_during_training_every_n_steps
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'parameter_server'
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
,
summary_verbosity
=
2
,
save_summaries_steps
=
2
,
datasets_use_prefetch
=
False
),
expected_num_eval_batches_found
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_steps
=
2
,
variable_update
=
'replicated'
,
use_fp16
=
True
,
train_dir
=
train_dir
,
eval_batch_size
=
base_params
.
batch_size
+
2
),
expected_num_eval_batches_found
)
# Test --eval_during_training_every_n_epochs
every_n_epochs
=
(
2
*
base_params
.
batch_size
*
base_params
.
num_gpus
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_every_n_epochs
=
every_n_epochs
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_at_specified_steps
list_steps
=
[
2
,
3
,
5
,
7
,
1000
]
num_eval_steps
=
1
+
sum
(
1
for
step
in
list_steps
if
step
<
base_params
.
num_batches
)
expected_num_eval_batches_found
=
(
base_params
.
num_eval_batches
*
num_eval_steps
)
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_at_specified_steps
=
list_steps
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_at_specified_epochs
list_epochs
=
[(
step
*
base_params
.
batch_size
*
base_params
.
num_gpus
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
for
step
in
list_steps
]
self
.
_testEvalDuringTraining
(
base_params
.
_replace
(
eval_during_training_at_specified_epochs
=
list_epochs
,
variable_update
=
'replicated'
),
expected_num_eval_batches_found
)
# Test --eval_during_training_every_n_steps runs with synthetic data.
params
=
base_params
.
_replace
(
variable_update
=
'replicated'
,
data_dir
=
None
,
eval_during_training_every_n_steps
=
2
,
num_batches
=
2
)
benchmark_cnn
.
BenchmarkCNN
(
params
).
run
()
def
testEvalDuringTrainingNumEpochs
(
self
):
params
=
benchmark_cnn
.
make_params
(
batch_size
=
1
,
eval_batch_size
=
2
,
eval_during_training_every_n_steps
=
1
,
num_batches
=
30
,
num_eval_epochs
=
100
/
datasets
.
IMAGENET_NUM_VAL_IMAGES
)
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
self
.
assertEqual
(
bench_cnn
.
num_batches
,
30
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_epochs
,
30
/
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_eval_batches
,
50
)
self
.
assertAlmostEqual
(
bench_cnn
.
num_eval_epochs
,
100
/
datasets
.
IMAGENET_NUM_VAL_IMAGES
)
def
testEarlyStopping
(
self
):
params
=
benchmark_cnn
.
make_params
(
batch_size
=
2
,
display_every
=
1
,
num_batches
=
100
,
eval_during_training_every_n_steps
=
2
,
stop_at_top_1_accuracy
=
0.4
,
)
with
mock
.
patch
.
object
(
benchmark_cnn
.
BenchmarkCNN
,
'_eval_once'
,
side_effect
=
[(
0.1
,
0.1
),
(
0.5
,
0.5
),
(
0.2
,
0.2
)]
)
as
mock_eval_once
:
logs
=
[]
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
)):
bench_cnn
.
run
()
training_outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
print_training_accuracy
=
False
)
# We should stop after the second evaluation, and we evaluate every 2
# steps. So there should be 2 * 2 = 4 training outputs.
self
.
assertEqual
(
len
(
training_outputs
),
4
)
self
.
assertEqual
(
mock_eval_once
.
call_count
,
2
)
def
testOutOfRangeErrorsAreNotIgnored
(
self
):
error_msg
=
'Fake OutOfRangeError error message'
with
mock
.
patch
.
object
(
benchmark_cnn
.
BenchmarkCNN
,
'benchmark_with_session'
,
side_effect
=
tf
.
errors
.
OutOfRangeError
(
None
,
None
,
error_msg
)):
with
self
.
assertRaisesRegex
(
RuntimeError
,
error_msg
):
benchmark_cnn
.
BenchmarkCNN
(
benchmark_cnn
.
make_params
()).
run
()
def
testInvalidFlags
(
self
):
params
=
benchmark_cnn
.
make_params
(
device
=
'cpu'
,
data_format
=
'NCHW'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is only supported for 'replicated', 'ps',
# and 'independent' variable_updates.
invalid_variable_updates
=
[
'distributed_replicated'
,
'distributed_all_reduce'
]
for
variable_update
in
invalid_variable_updates
:
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
variable_update
=
variable_update
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is not supported for 'nccl'.
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
all_reduce_spec
=
'nccl'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
# Automatic loss scaling is not supported for 'staged_vars'.
params
=
benchmark_cnn
.
make_params
(
use_fp16
=
True
,
fp16_vars
=
True
,
fp16_enable_auto_loss_scale
=
True
,
staged_vars
=
True
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
BenchmarkCNN
(
params
)
def
testMakeParams
(
self
):
default_params
=
benchmark_cnn
.
make_params
()
self
.
assertEqual
(
default_params
.
model
,
flags
.
param_specs
[
'model'
].
default_value
)
params
=
benchmark_cnn
.
make_params
(
model
=
'foo'
)
self
.
assertEqual
(
params
.
model
,
'foo'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
make_params
(
job_name
=
'foo'
)
with
self
.
assertRaises
(
ValueError
):
benchmark_cnn
.
make_params
(
gpu_memory_frac_for_testing
=-
1.
)
class
VariableUpdateTest
(
tf
.
test
.
TestCase
):
"""Tests that variables are updated correctly.
These tests use a very simple deterministic model. For example, some tests use
the model
loss = image * A * B
where image is a 1x1 images (with a single scalar value), and A and B are
scalar variables. Tests will run tf_cnn_benchmarks with such a model, on a
sequence of scalar images, and assert that the losses are the correct value.
Since the losses depend on the variables, this indirectly tests variables are
updated correctly.
"""
def
setUp
(
self
):
super
(
VariableUpdateTest
,
self
).
setUp
()
_check_has_gpu
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
def
_get_benchmark_cnn_losses
(
self
,
inputs
,
params
):
"""Returns the losses of BenchmarkCNN on the given inputs and params."""
logs
=
[]
model
=
test_util
.
TestCNNModel
()
with
test_util
.
monkey_patch
(
benchmark_cnn
,
log_fn
=
test_util
.
print_and_add_to_list
(
logs
),
LOSS_AND_ACCURACY_DIGITS_TO_SHOW
=
15
):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
dataset
=
test_util
.
TestDataSet
(),
model
=
model
)
# The test model does not use labels when computing loss, so the label
# values do not matter as long as it's the right shape.
labels
=
np
.
array
([
1
]
*
inputs
.
shape
[
0
])
bench
.
input_preprocessor
.
set_fake_data
(
inputs
,
labels
)
if
bench
.
eval_input_preprocessor
:
bench
.
eval_input_preprocessor
.
set_fake_data
(
inputs
,
labels
)
bench
.
run
()
outputs
=
test_util
.
get_training_outputs_from_logs
(
logs
,
params
.
print_training_accuracy
)
return
[
x
.
loss
for
x
in
outputs
]
def
_test_variable_update
(
self
,
params
):
"""Tests variables are updated correctly when the given params are used.
A BenchmarkCNN is created with a TestCNNModel, and is run with some scalar
images. The losses are then compared with the losses obtained with
TestCNNModel().manually_compute_losses()
Args:
params: a Params tuple used to create BenchmarkCNN.
"""
inputs
=
test_util
.
get_fake_var_update_inputs
()
actual_losses
=
self
.
_get_benchmark_cnn_losses
(
inputs
,
params
)
expected_losses
,
=
test_util
.
TestCNNModel
().
manually_compute_losses
(
inputs
,
1
,
params
)
rtol
=
3e-2
if
params
.
use_fp16
else
1e-5
self
.
assertAllClose
(
actual_losses
[:
len
(
expected_losses
)],
expected_losses
,
rtol
=
rtol
,
atol
=
0.
)
def
_test_variable_updates
(
self
,
params
,
var_updates
=
(
'parameter_server'
,
'replicated'
)):
for
var_update
in
var_updates
:
self
.
_test_variable_update
(
params
.
_replace
(
variable_update
=
var_update
))
def
testDefault
(
self
):
params
=
test_util
.
get_var_update_params
()
self
.
_test_variable_updates
(
params
)
# For some reason, this test doesn't always pass
# def testCpuAsDevice(self):
# params = test_util.get_var_update_params()._replace(
# device='cpu',
# data_format='NHWC') # NHWC required when --device=cpu
# self._test_variable_updates(params)
def
testCpuAsLocalParamDevice
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
local_parameter_device
=
'cpu'
)
self
.
_test_variable_updates
(
params
)
def
testFp16
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_fp16
=
True
)
self
.
_test_variable_updates
(
params
)
def
testMomentum
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
optimizer
=
'momentum'
)
self
.
_test_variable_updates
(
params
)
def
testRmsprop
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
optimizer
=
'rmsprop'
)
self
.
_test_variable_updates
(
params
)
def
testNoLayers
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_tf_layers
=
False
)
self
.
_test_variable_updates
(
params
)
def
testVariousAllReduceSpecs
(
self
):
# We do not test xring, because it requires all Variables to have at least
# two elements.
params
=
test_util
.
get_var_update_params
().
_replace
(
all_reduce_spec
=
'pscpu'
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
params
=
params
.
_replace
(
all_reduce_spec
=
'psgpu'
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
# TODO(b/80125832): Enable nccl in tests
# params = params._replace(all_reduce_spec='nccl',
# compact_gradient_transfer=False)
# self._test_variable_updates(params, var_updates=('replicated',))
def
testPrintBaseLoss
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
loss_type_to_report
=
'base_loss'
)
self
.
_test_variable_updates
(
params
)
def
testSingleL2LossOp
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
single_l2_loss_op
=
True
)
self
.
_test_variable_updates
(
params
)
def
testResourceVars
(
self
):
params
=
test_util
.
get_var_update_params
().
_replace
(
use_resource_vars
=
True
)
self
.
_test_variable_updates
(
params
)
def
testEvalDuringTrainingEveryNSteps
(
self
):
# TODO(reedwm): Test that the eval results are correct. This only tests that
# training results are correct.
params
=
test_util
.
get_var_update_params
().
_replace
(
eval_during_training_every_n_steps
=
1
)
self
.
_test_variable_updates
(
params
,
var_updates
=
(
'replicated'
,))
class
VariableMgrLocalReplicatedTest
(
tf
.
test
.
TestCase
):
def
_test_grad_aggregation_with_var_mgr
(
self
,
variable_mgr
,
num_towers
,
num_vars
,
deferred_grads
):
tower_devices
=
[
'/gpu:%d'
%
i
for
i
in
range
(
num_towers
)]
tower_grads
=
[]
expected_sums
=
[
0.
]
*
num_vars
for
i
,
tower_device
in
enumerate
(
tower_devices
):
with
tf
.
device
(
tower_device
):
grad_vars
=
[]
for
j
in
range
(
num_vars
):
n
=
num_towers
*
i
+
j
grad_vars
.
append
((
tf
.
constant
(
n
,
dtype
=
tf
.
float32
),
tf
.
Variable
(
n
,
dtype
=
tf
.
float32
)))
expected_sums
[
j
]
+=
n
tower_grads
.
append
(
grad_vars
)
_
,
agg_device_grads
=
variable_mgr
.
preprocess_device_grads
(
tower_grads
)
expected_device_grads
=
[]
for
i
in
range
(
num_towers
):
expected_grad_vars
=
[]
for
j
in
range
(
num_vars
):
expected_grad_and_var
=
[
expected_sums
[
j
],
num_towers
*
i
+
j
]
if
isinstance
(
agg_device_grads
[
i
][
j
],
tuple
):
# agg_device_grads[i][j] can be a list or tuple.
expected_grad_and_var
=
tuple
(
expected_grad_and_var
)
expected_grad_vars
.
append
(
expected_grad_and_var
)
if
isinstance
(
agg_device_grads
[
i
],
tuple
):
# agg_device_grads[i] can be a list or tuple.
expected_grad_vars
=
tuple
(
expected_grad_vars
)
expected_device_grads
.
append
(
expected_grad_vars
)
config
=
tf
.
ConfigProto
(
allow_soft_placement
=
True
)
with
tf
.
Session
(
config
=
config
)
as
sess
:
sess
.
run
(
tf
.
initialize_all_variables
())
sess
.
run
(
variable_mgr
.
_warmup_ops
)
if
deferred_grads
:
# With deferred grads, the result of a session run is always the summed
# gradients from the previous session run.
sess
.
run
(
agg_device_grads
)
feed_dict
=
{
g
:
0
for
grad_vars
in
tower_grads
for
g
,
_
in
grad_vars
}
agg_device_grads_
=
sess
.
run
(
agg_device_grads
,
feed_dict
)
else
:
agg_device_grads_
=
sess
.
run
(
agg_device_grads
)
self
.
assertEqual
(
agg_device_grads_
,
expected_device_grads
)
def
_test_grad_aggregation
(
self
,
params
,
num_vars
):
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
deferred_grads
=
(
params
.
variable_consistency
==
'relaxed'
)
self
.
_test_grad_aggregation_with_var_mgr
(
bench
.
variable_mgr
,
bench
.
num_gpus
,
num_vars
,
deferred_grads
)
def
test_grad_aggregation
(
self
):
base_params
=
benchmark_cnn
.
make_params
(
num_gpus
=
10
,
variable_update
=
'replicated'
,
use_fp16
=
True
)
params
=
base_params
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
variable_consistency
=
'relaxed'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
variable_consistency
=
'relaxed'
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
compact_gradient_transfer
=
False
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
num_gpus
=
8
,
hierarchical_copy
=
True
)
self
.
_test_grad_aggregation
(
params
,
10
)
# TODO(b/80125832): Enable nccl in tests
# params = base_params._replace(all_reduce_spec='nccl',
# compact_gradient_transfer=False,
# # For some reason, this test freezes when
# # num_gpus=10
# num_gpus=8)
# self._test_grad_aggregation(params, 10)
params
=
base_params
.
_replace
(
all_reduce_spec
=
'pscpu'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
num_gpus
=
8
,
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
hierarchical_copy
=
True
)
self
.
_test_grad_aggregation
(
params
,
10
)
# TODO(b/80125832): Enable nccl in tests
# params = base_params._replace(num_gpus=8,
# gradient_repacking=3,
# variable_consistency='relaxed',
# all_reduce_spec='nccl',
# compact_gradient_transfer=False)
# self._test_grad_aggregation(params, 10)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
all_reduce_spec
=
'pscpu'
)
self
.
_test_grad_aggregation
(
params
,
10
)
params
=
base_params
.
_replace
(
gradient_repacking
=
3
,
variable_consistency
=
'relaxed'
,
all_reduce_spec
=
'xring'
)
self
.
_test_grad_aggregation
(
params
,
10
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for CNN benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
threading
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
def
tensorflow_version_tuple
():
v
=
tf
.
__version__
major
,
minor
,
patch
=
v
.
split
(
'.'
)
return
(
int
(
major
),
int
(
minor
),
patch
)
def
tensorflow_version
():
vt
=
tensorflow_version_tuple
()
return
vt
[
0
]
*
1000
+
vt
[
1
]
def
log_fn
(
log
):
print
(
log
,
flush
=
True
)
def
roll_numpy_batches
(
array
,
batch_size
,
shift_ratio
):
"""Moves a proportion of batches from start to the end of the array.
This function moves a proportion of batches, specified by `shift_ratio`, from
the starts of the array to the end. The number of batches moved is rounded
down to the nearest integer. For example,
```
roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
```
Args:
array: A Numpy array whose first dimension is the batch dimension.
batch_size: The batch size.
shift_ratio: Proportion of batches to move from the start of the array to
the end of the array.
Returns:
A new Numpy array, with a proportion of the batches at the start of `array`
moved to the end.
"""
num_items
=
array
.
shape
[
0
]
assert
num_items
%
batch_size
==
0
num_batches
=
num_items
//
batch_size
starting_batch
=
int
(
num_batches
*
shift_ratio
)
starting_item
=
starting_batch
*
batch_size
return
np
.
roll
(
array
,
-
starting_item
,
axis
=
0
)
# For Python 2.7 compatibility, we do not use threading.Barrier.
class
Barrier
(
object
):
"""Implements a lightweight Barrier.
Useful for synchronizing a fixed number of threads at known synchronization
points. Threads block on 'wait()' and simultaneously return once they have
all made that call.
# Implementation adopted from boost/thread/barrier.hpp
"""
def
__init__
(
self
,
parties
):
"""Create a barrier, initialised to 'parties' threads."""
self
.
cond
=
threading
.
Condition
(
threading
.
Lock
())
self
.
parties
=
parties
# Indicates the number of waiting parties.
self
.
waiting
=
0
# generation is needed to deal with spurious wakeups. If self.cond.wait()
# wakes up for other reasons, generation will force it go back to wait().
self
.
generation
=
0
self
.
broken
=
False
def
wait
(
self
):
"""Wait for the barrier."""
with
self
.
cond
:
# Check if the barrier has been disabled or not.
if
self
.
broken
:
return
gen
=
self
.
generation
self
.
waiting
+=
1
if
self
.
waiting
==
self
.
parties
:
self
.
waiting
=
0
self
.
generation
+=
1
self
.
cond
.
notify_all
()
# loop because of spurious wakeups
while
gen
==
self
.
generation
:
self
.
cond
.
wait
()
# TODO(huangyp): Remove this method once we find a way to know which step
# is the last barrier.
def
abort
(
self
):
"""Clear existing barrier and disable this barrier."""
with
self
.
cond
:
if
self
.
waiting
>
0
:
self
.
generation
+=
1
self
.
cond
.
notify_all
()
self
.
broken
=
True
class
ImageProducer
(
object
):
"""An image producer that puts images into a staging area periodically.
This class is useful for periodically running a set of ops, `put_ops` on a
different thread every `batch_group_size` steps.
The notify_image_consumption() method is used to increment an internal counter
so that every `batch_group_size` times it is called, `put_ops` is executed. A
barrier is placed so that notify_image_consumption() will block until
the previous call to `put_ops` has been executed.
The start() method is used to start the thread that runs `put_ops`.
The done() method waits until the last put_ops is executed and stops the
thread.
The purpose of this class is to fill an image input pipeline every
`batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
to the input pipeline when run, and that every step, 1 batch of images is
consumed. Then, by calling notify_image_consumption() every step, images are
supplied to the input pipeline at the same amount they are consumed.
Example usage:
```
put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
get_op = ... # Dequeues 1 batch, and does some operations on it
batch_group_size = 4
with tf.Session() as sess:
image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
image_producer.start()
for _ in range(100):
sess.run(get_op)
image_producer.notify_image_consumption()
```
"""
def
__init__
(
self
,
sess
,
put_ops
,
batch_group_size
,
use_python32_barrier
):
self
.
sess
=
sess
self
.
num_gets
=
0
self
.
put_ops
=
put_ops
self
.
batch_group_size
=
batch_group_size
self
.
done_event
=
threading
.
Event
()
if
(
use_python32_barrier
and
sys
.
version_info
[
0
]
==
3
and
sys
.
version_info
[
1
]
>=
2
):
self
.
put_barrier
=
threading
.
Barrier
(
2
)
else
:
self
.
put_barrier
=
Barrier
(
2
)
def
_should_put
(
self
):
return
(
self
.
num_gets
+
1
)
%
self
.
batch_group_size
==
0
def
done
(
self
):
"""Stop the image producer."""
self
.
done_event
.
set
()
self
.
put_barrier
.
abort
()
self
.
thread
.
join
()
def
start
(
self
):
"""Start the image producer."""
self
.
sess
.
run
([
self
.
put_ops
])
self
.
thread
=
threading
.
Thread
(
target
=
self
.
_loop_producer
)
# Set daemon to true to allow Ctrl + C to terminate all threads.
self
.
thread
.
daemon
=
True
self
.
thread
.
start
()
def
notify_image_consumption
(
self
):
"""Increment the counter of image_producer by 1.
This should only be called by the main thread that consumes images and runs
the model computation. One batch of images should be consumed between
calling start() and the first call to this method. Then, one batch of images
should be consumed between any two successive calls to this method.
"""
if
self
.
_should_put
():
self
.
put_barrier
.
wait
()
self
.
num_gets
+=
1
def
_loop_producer
(
self
):
while
not
self
.
done_event
.
isSet
():
self
.
sess
.
run
([
self
.
put_ops
])
self
.
put_barrier
.
wait
()
class
BaseClusterManager
(
object
):
"""The manager for the cluster of servers running the benchmark."""
def
__init__
(
self
,
params
):
worker_hosts
=
params
.
worker_hosts
.
split
(
','
)
ps_hosts
=
params
.
ps_hosts
.
split
(
','
)
if
params
.
ps_hosts
else
[]
cluster
=
{
'worker'
:
worker_hosts
}
if
ps_hosts
:
cluster
[
'ps'
]
=
ps_hosts
self
.
_cluster_spec
=
tf
.
train
.
ClusterSpec
(
cluster
)
def
get_target
(
self
):
"""Returns a target to be passed to tf.Session()."""
raise
NotImplementedError
(
'get_target must be implemented by subclass'
)
def
join_server
(
self
):
raise
NotImplementedError
(
'join must be implemented by subclass'
)
def
get_cluster_spec
(
self
):
return
self
.
_cluster_spec
def
num_workers
(
self
):
return
len
(
self
.
_cluster_spec
.
job_tasks
(
'worker'
))
def
num_ps
(
self
):
if
'ps'
in
self
.
_cluster_spec
.
jobs
:
return
len
(
self
.
_cluster_spec
.
job_tasks
(
'ps'
))
else
:
return
0
class
GrpcClusterManager
(
BaseClusterManager
):
"""A cluster manager for a cluster networked with gRPC."""
def
__init__
(
self
,
params
,
config_proto
):
super
(
GrpcClusterManager
,
self
).
__init__
(
params
)
if
params
.
job_name
==
'controller'
:
self
.
_target
=
'grpc://%s'
%
self
.
_cluster_spec
.
job_tasks
(
'worker'
)[
0
]
else
:
self
.
_server
=
tf
.
train
.
Server
(
self
.
_cluster_spec
,
job_name
=
params
.
job_name
,
task_index
=
params
.
task_index
,
config
=
config_proto
,
protocol
=
params
.
server_protocol
)
self
.
_target
=
self
.
_server
.
target
def
get_target
(
self
):
return
self
.
_target
def
join_server
(
self
):
return
self
.
_server
.
join
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_cnn_benchmarks.cnn_util."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
threading
import
time
import
tensorflow.compat.v1
as
tf
import
cnn_util
class
CnnUtilBarrierTest
(
tf
.
test
.
TestCase
):
def
testBarrier
(
self
):
num_tasks
=
20
num_waits
=
4
barrier
=
cnn_util
.
Barrier
(
num_tasks
)
threads
=
[]
sync_matrix
=
[]
for
i
in
range
(
num_tasks
):
sync_times
=
[
0
]
*
num_waits
thread
=
threading
.
Thread
(
target
=
self
.
_run_task
,
args
=
(
barrier
,
sync_times
))
thread
.
start
()
threads
.
append
(
thread
)
sync_matrix
.
append
(
sync_times
)
for
thread
in
threads
:
thread
.
join
()
for
wait_index
in
range
(
num_waits
-
1
):
# Max of times at iteration i < min of times at iteration i + 1
self
.
assertLessEqual
(
max
([
sync_matrix
[
i
][
wait_index
]
for
i
in
range
(
num_tasks
)]),
min
([
sync_matrix
[
i
][
wait_index
+
1
]
for
i
in
range
(
num_tasks
)]))
def
_run_task
(
self
,
barrier
,
sync_times
):
for
wait_index
in
range
(
len
(
sync_times
)):
sync_times
[
wait_index
]
=
time
.
time
()
barrier
.
wait
()
def
testBarrierAbort
(
self
):
num_tasks
=
2
num_waits
=
1
sync_times
=
[
0
]
*
num_waits
barrier
=
cnn_util
.
Barrier
(
num_tasks
)
thread
=
threading
.
Thread
(
target
=
self
.
_run_task
,
args
=
(
barrier
,
sync_times
))
thread
.
start
()
barrier
.
abort
()
# thread won't be blocked by done barrier.
thread
.
join
()
class
ImageProducerTest
(
tf
.
test
.
TestCase
):
def
_slow_tensorflow_op
(
self
):
"""Returns a TensorFlow op that takes approximately 0.1s to complete."""
def
slow_func
(
v
):
time
.
sleep
(
0.1
)
return
v
return
tf
.
py_func
(
slow_func
,
[
tf
.
constant
(
0.
)],
tf
.
float32
).
op
def
_test_image_producer
(
self
,
batch_group_size
,
put_slower_than_get
):
# We use the variable x to simulate a staging area of images. x represents
# the number of batches in the staging area.
x
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
)
if
put_slower_than_get
:
put_dep
=
self
.
_slow_tensorflow_op
()
get_dep
=
tf
.
no_op
()
else
:
put_dep
=
tf
.
no_op
()
get_dep
=
self
.
_slow_tensorflow_op
()
with
tf
.
control_dependencies
([
put_dep
]):
put_op
=
x
.
assign_add
(
batch_group_size
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
get_dep
]):
get_op
=
x
.
assign_sub
(
1
,
use_locking
=
True
)
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
variables_initializer
([
x
]))
image_producer
=
cnn_util
.
ImageProducer
(
sess
,
put_op
,
batch_group_size
,
use_python32_barrier
=
False
)
image_producer
.
start
()
for
_
in
range
(
5
*
batch_group_size
):
sess
.
run
(
get_op
)
# We assert x is nonnegative, to ensure image_producer never causes
# an unstage op to block. We assert x is at most 2 * batch_group_size,
# to ensure it doesn't use too much memory by storing too many batches
# in the staging area.
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
image_producer
.
notify_image_consumption
()
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
image_producer
.
done
()
time
.
sleep
(
0.1
)
self
.
assertGreaterEqual
(
sess
.
run
(
x
),
0
)
self
.
assertLessEqual
(
sess
.
run
(
x
),
2
*
batch_group_size
)
def
test_image_producer
(
self
):
self
.
_test_image_producer
(
1
,
False
)
self
.
_test_image_producer
(
1
,
True
)
self
.
_test_image_producer
(
2
,
False
)
self
.
_test_image_producer
(
2
,
True
)
self
.
_test_image_producer
(
3
,
False
)
self
.
_test_image_producer
(
3
,
True
)
self
.
_test_image_producer
(
8
,
False
)
self
.
_test_image_producer
(
8
,
True
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 Google. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Forked from reference model implementation.
COCO API: github.com/cocodataset/cocoapi/
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
atexit
import
tempfile
from
absl
import
flags
import
numpy
as
np
from
pycocotools.coco
import
COCO
from
pycocotools.cocoeval
import
COCOeval
import
six
import
tensorflow.compat.v1
as
tf
import
mlperf
import
ssd_constants
FLAGS
=
flags
.
FLAGS
# https://github.com/cocodataset/cocoapi/issues/49
if
six
.
PY3
:
import
pycocotools.coco
pycocotools
.
coco
.
unicode
=
str
def
async_eval_runner
(
queue_predictions
,
queue_results
,
val_json_file
):
"""Load intermediate eval results and get COCO metrics."""
while
True
:
message
=
queue_predictions
.
get
()
if
message
==
'STOP'
:
# poison pill
break
step
,
predictions
=
message
results
=
compute_map
(
predictions
,
val_json_file
)
queue_results
.
put
((
step
,
results
))
def
compute_map
(
predictions
,
val_json_file
):
"""Use model predictions to compute mAP.
Args:
predictions: a list of tuples returned by decoded_predictions function,
each containing the following elements:
image source_id, box coordinates in XYWH order, probability score, label
val_json_file: path to COCO annotation file
Returns:
A dictionary that maps all COCO metrics (keys) to their values
"""
if
val_json_file
.
startswith
(
"gs://"
):
_
,
local_val_json
=
tempfile
.
mkstemp
(
suffix
=
".json"
)
tf
.
gfile
.
Remove
(
local_val_json
)
tf
.
gfile
.
Copy
(
val_json_file
,
local_val_json
)
atexit
.
register
(
tf
.
gfile
.
Remove
,
local_val_json
)
else
:
local_val_json
=
val_json_file
cocoGt
=
COCO
(
local_val_json
)
cocoDt
=
cocoGt
.
loadRes
(
np
.
array
(
predictions
))
E
=
COCOeval
(
cocoGt
,
cocoDt
,
iouType
=
'bbox'
)
E
.
evaluate
()
E
.
accumulate
()
E
.
summarize
()
print
(
"Current AP: {:.5f}"
.
format
(
E
.
stats
[
0
]))
metric_names
=
[
'AP'
,
'AP50'
,
'AP75'
,
'APs'
,
'APm'
,
'APl'
,
'ARmax1'
,
'ARmax10'
,
'ARmax100'
,
'ARs'
,
'ARm'
,
'ARl'
]
# Prefix with "COCO" to group in TensorBoard.
return
{
"COCO/"
+
key
:
value
for
key
,
value
in
zip
(
metric_names
,
E
.
stats
)}
def
calc_iou
(
target
,
candidates
):
target_tiled
=
np
.
tile
(
target
[
np
.
newaxis
,
:],
(
candidates
.
shape
[
0
],
1
))
# Left Top & Right Bottom
lt
=
np
.
maximum
(
target_tiled
[:,:
2
],
candidates
[:,:
2
])
rb
=
np
.
minimum
(
target_tiled
[:,
2
:],
candidates
[:,
2
:])
delta
=
np
.
maximum
(
rb
-
lt
,
0
)
intersect
=
delta
[:,
0
]
*
delta
[:,
1
]
delta1
=
target_tiled
[:,
2
:]
-
candidates
[:,:
2
]
area1
=
delta1
[:,
0
]
*
delta1
[:,
1
]
delta2
=
target_tiled
[:,
2
:]
-
candidates
[:,:
2
]
area2
=
delta2
[:,
0
]
*
delta2
[:,
1
]
iou
=
intersect
/
(
area1
+
area2
-
intersect
)
return
iou
# TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based
# implementation under ssd_model.py accuracy_function.
def
decode_predictions
(
labels_and_predictions
):
"""Decode predictions and remove unused boxes and labels."""
predictions
=
[]
for
example
in
labels_and_predictions
:
source_id
=
int
(
example
[
ssd_constants
.
SOURCE_ID
])
pred_box
=
example
[
ssd_constants
.
PRED_BOXES
]
pred_scores
=
example
[
ssd_constants
.
PRED_SCORES
]
locs
,
labels
,
probs
=
decode_single
(
pred_box
,
pred_scores
,
ssd_constants
.
OVERLAP_CRITERIA
,
ssd_constants
.
MAX_NUM_EVAL_BOXES
,
ssd_constants
.
MAX_NUM_EVAL_BOXES
)
raw_height
,
raw_width
,
_
=
example
[
ssd_constants
.
RAW_SHAPE
]
for
loc
,
label
,
prob
in
zip
(
locs
,
labels
,
probs
):
# Ordering convention differs, hence [1], [0] rather than [0], [1]
x
,
y
=
loc
[
1
]
*
raw_width
,
loc
[
0
]
*
raw_height
w
,
h
=
(
loc
[
3
]
-
loc
[
1
])
*
raw_width
,
(
loc
[
2
]
-
loc
[
0
])
*
raw_height
predictions
.
append
(
[
source_id
,
x
,
y
,
w
,
h
,
prob
,
ssd_constants
.
CLASS_INV_MAP
[
label
]])
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
NMS_THRESHOLD
,
value
=
ssd_constants
.
OVERLAP_CRITERIA
)
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
NMS_MAX_DETECTIONS
,
value
=
ssd_constants
.
MAX_NUM_EVAL_BOXES
)
return
predictions
def
decode_single
(
bboxes_in
,
scores_in
,
criteria
,
max_output
,
max_num
=
200
):
# Reference to https://github.com/amdegroot/ssd.pytorch
bboxes_out
=
[]
scores_out
=
[]
labels_out
=
[]
for
i
,
score
in
enumerate
(
np
.
split
(
scores_in
,
scores_in
.
shape
[
1
],
1
)):
score
=
np
.
squeeze
(
score
,
1
)
# skip background
if
i
==
0
:
continue
mask
=
score
>
ssd_constants
.
MIN_SCORE
if
not
np
.
any
(
mask
):
continue
bboxes
,
score
=
bboxes_in
[
mask
,
:],
score
[
mask
]
score_idx_sorted
=
np
.
argsort
(
score
)
score_sorted
=
score
[
score_idx_sorted
]
score_idx_sorted
=
score_idx_sorted
[
-
max_num
:]
candidates
=
[]
# perform non-maximum suppression
while
len
(
score_idx_sorted
):
idx
=
score_idx_sorted
[
-
1
]
bboxes_sorted
=
bboxes
[
score_idx_sorted
,
:]
bboxes_idx
=
bboxes
[
idx
,
:]
iou
=
calc_iou
(
bboxes_idx
,
bboxes_sorted
)
score_idx_sorted
=
score_idx_sorted
[
iou
<
criteria
]
candidates
.
append
(
idx
)
bboxes_out
.
append
(
bboxes
[
candidates
,
:])
scores_out
.
append
(
score
[
candidates
])
labels_out
.
extend
([
i
]
*
len
(
candidates
))
if
len
(
scores_out
)
==
0
:
tf
.
logging
.
info
(
"No objects detected. Returning dummy values."
)
return
(
np
.
zeros
(
shape
=
(
1
,
4
),
dtype
=
np
.
float32
),
np
.
zeros
(
shape
=
(
1
,),
dtype
=
np
.
int32
),
np
.
ones
(
shape
=
(
1
,),
dtype
=
np
.
float32
)
*
ssd_constants
.
DUMMY_SCORE
,
)
bboxes_out
=
np
.
concatenate
(
bboxes_out
,
axis
=
0
)
scores_out
=
np
.
concatenate
(
scores_out
,
axis
=
0
)
labels_out
=
np
.
array
(
labels_out
)
max_ids
=
np
.
argsort
(
scores_out
)[
-
max_output
:]
return
bboxes_out
[
max_ids
,
:],
labels_out
[
max_ids
],
scores_out
[
max_ids
]
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Constants used in tf_cnn_benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
enum
import
Enum
# Results fetched with this prefix will not be reduced. Instead, they will be
# passed as matrices to model's postprocess function.
UNREDUCED_ACCURACY_OP_PREFIX
=
"tensor:"
# Eval result values with this name prefix will be included in summary.
SIMPLE_VALUE_RESULT_PREFIX
=
"simple_value:"
class
BenchmarkMode
(
object
):
"""Benchmark running mode."""
TRAIN
=
"training"
EVAL
=
"evaluation"
TRAIN_AND_EVAL
=
"training + evaluation"
FORWARD_ONLY
=
"forward only"
class
NetworkTopology
(
str
,
Enum
):
"""Network topology describes how multiple GPUs are inter-connected.
"""
# DGX-1 uses hybrid cube mesh topology with the following device peer to peer
# matrix:
# DMA: 0 1 2 3 4 5 6 7
# 0: Y Y Y Y Y N N N
# 1: Y Y Y Y N Y N N
# 2: Y Y Y Y N N Y N
# 3: Y Y Y Y N N N Y
# 4: Y N N N Y Y Y Y
# 5: N Y N N Y Y Y Y
# 6: N N Y N Y Y Y Y
# 7: N N N Y Y Y Y Y
DGX1
=
"dgx1"
# V100 in GCP are connected with the following device peer to peer matrix.
# In this topology, bandwidth of the connection depends on if it uses NVLink
# or PCIe link.
# DMA: 0 1 2 3 4 5 6 7
# 0: Y Y Y Y N Y N N
# 1: Y Y Y Y N N N N
# 2: Y Y Y Y N N N Y
# 3: Y Y Y Y N N N N
# 4: N N N N Y Y Y Y
# 5: Y N N N Y Y Y Y
# 6: N N N N Y Y Y Y
# 7: N N Y N Y Y Y Y
GCP_V100
=
"gcp_v100"
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""CNN builder."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
defaultdict
import
contextlib
import
numpy
as
np
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-direct-tensorflow-import
import
mlperf
from
tensorflow.python.layers
import
convolutional
as
conv_layers
from
tensorflow.python.layers
import
core
as
core_layers
from
tensorflow.python.layers
import
normalization
as
normalization_layers
from
tensorflow.python.layers
import
pooling
as
pooling_layers
from
tensorflow.python.training
import
moving_averages
_data_format_to_channel_axis
=
{
'NCHW'
:
1
,
'NHWC'
:
3
}
class
ConvNetBuilder
(
object
):
"""Builder of cnn net."""
def
__init__
(
self
,
input_op
,
input_nchan
,
phase_train
,
use_tf_layers
,
data_format
=
'NCHW'
,
dtype
=
tf
.
float32
,
variable_dtype
=
tf
.
float32
):
self
.
top_layer
=
input_op
self
.
top_size
=
input_nchan
self
.
phase_train
=
phase_train
self
.
use_tf_layers
=
use_tf_layers
self
.
data_format
=
data_format
self
.
dtype
=
dtype
self
.
variable_dtype
=
variable_dtype
self
.
counts
=
defaultdict
(
lambda
:
0
)
self
.
use_batch_norm
=
False
self
.
batch_norm_config
=
{}
# 'decay': 0.997, 'scale': True}
self
.
channel_pos
=
(
'channels_last'
if
data_format
==
'NHWC'
else
'channels_first'
)
self
.
aux_top_layer
=
None
self
.
aux_top_size
=
0
def
get_custom_getter
(
self
):
"""Returns a custom getter that this class's methods must be called under.
All methods of this class must be called under a variable scope that was
passed this custom getter. Example:
```python
network = ConvNetBuilder(...)
with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
network.conv(...)
# Call more methods of network here
```
Currently, this custom getter only does anything if self.use_tf_layers is
True. In that case, it causes variables to be stored as dtype
self.variable_type, then casted to the requested dtype, instead of directly
storing the variable as the requested dtype.
"""
def
inner_custom_getter
(
getter
,
*
args
,
**
kwargs
):
"""Custom getter that forces variables to have type self.variable_type."""
if
not
self
.
use_tf_layers
:
return
getter
(
*
args
,
**
kwargs
)
requested_dtype
=
kwargs
[
'dtype'
]
if
not
(
requested_dtype
==
tf
.
float32
and
self
.
variable_dtype
==
tf
.
float16
):
# Only change the variable dtype if doing so does not decrease variable
# precision.
kwargs
[
'dtype'
]
=
self
.
variable_dtype
var
=
getter
(
*
args
,
**
kwargs
)
# This if statement is needed to guard the cast, because batch norm
# assigns directly to the return value of this custom getter. The cast
# makes the return value not a variable so it cannot be assigned. Batch
# norm variables are always in fp32 so this if statement is never
# triggered for them.
if
var
.
dtype
.
base_dtype
!=
requested_dtype
:
var
=
tf
.
cast
(
var
,
requested_dtype
)
return
var
return
inner_custom_getter
@
contextlib
.
contextmanager
def
switch_to_aux_top_layer
(
self
):
"""Context that construct cnn in the auxiliary arm."""
if
self
.
aux_top_layer
is
None
:
raise
RuntimeError
(
'Empty auxiliary top layer in the network.'
)
saved_top_layer
=
self
.
top_layer
saved_top_size
=
self
.
top_size
self
.
top_layer
=
self
.
aux_top_layer
self
.
top_size
=
self
.
aux_top_size
yield
self
.
aux_top_layer
=
self
.
top_layer
self
.
aux_top_size
=
self
.
top_size
self
.
top_layer
=
saved_top_layer
self
.
top_size
=
saved_top_size
def
get_variable
(
self
,
name
,
shape
,
dtype
,
cast_dtype
,
*
args
,
**
kwargs
):
# TODO(reedwm): Currently variables and gradients are transferred to other
# devices and machines as type `dtype`, not `cast_dtype`. In particular,
# this means in fp16 mode, variables are transferred as fp32 values, not
# fp16 values, which uses extra bandwidth.
var
=
tf
.
get_variable
(
name
,
shape
,
dtype
,
*
args
,
**
kwargs
)
return
tf
.
cast
(
var
,
cast_dtype
)
def
_conv2d_impl
(
self
,
input_layer
,
num_channels_in
,
filters
,
kernel_size
,
strides
,
padding
,
kernel_initializer
):
if
self
.
use_tf_layers
:
return
conv_layers
.
conv2d
(
input_layer
,
filters
,
kernel_size
,
strides
,
padding
,
self
.
channel_pos
,
kernel_initializer
=
kernel_initializer
,
use_bias
=
False
)
else
:
weights_shape
=
[
kernel_size
[
0
],
kernel_size
[
1
],
num_channels_in
,
filters
]
# We use the name 'conv2d/kernel' so the variable has the same name as its
# tf.layers equivalent. This way, if a checkpoint is written when
# self.use_tf_layers == True, it can be loaded when
# self.use_tf_layers == False, and vice versa.
weights
=
self
.
get_variable
(
'conv2d/kernel'
,
weights_shape
,
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
kernel_initializer
)
if
self
.
data_format
==
'NHWC'
:
strides
=
[
1
]
+
strides
+
[
1
]
else
:
strides
=
[
1
,
1
]
+
strides
return
tf
.
nn
.
conv2d
(
input_layer
,
weights
,
strides
,
padding
,
data_format
=
self
.
data_format
)
def
conv
(
self
,
num_out_channels
,
k_height
,
k_width
,
d_height
=
1
,
d_width
=
1
,
mode
=
'SAME'
,
input_layer
=
None
,
num_channels_in
=
None
,
use_batch_norm
=
None
,
stddev
=
None
,
activation
=
'relu'
,
bias
=
0.0
,
kernel_initializer
=
None
):
"""Construct a conv2d layer on top of cnn."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
num_channels_in
is
None
:
num_channels_in
=
self
.
top_size
if
stddev
is
not
None
and
kernel_initializer
is
None
:
kernel_initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
stddev
)
if
kernel_initializer
is
None
:
kernel_initializer
=
tf
.
variance_scaling_initializer
()
name
=
'conv'
+
str
(
self
.
counts
[
'conv'
])
self
.
counts
[
'conv'
]
+=
1
with
tf
.
variable_scope
(
name
):
strides
=
[
1
,
d_height
,
d_width
,
1
]
if
self
.
data_format
==
'NCHW'
:
strides
=
[
strides
[
0
],
strides
[
3
],
strides
[
1
],
strides
[
2
]]
if
mode
!=
'SAME_RESNET'
:
conv
=
self
.
_conv2d_impl
(
input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
mode
,
kernel_initializer
=
kernel_initializer
)
else
:
# Special padding mode for ResNet models
if
d_height
==
1
and
d_width
==
1
:
conv
=
self
.
_conv2d_impl
(
input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
'SAME'
,
kernel_initializer
=
kernel_initializer
)
else
:
rate
=
1
# Unused (for 'a trous' convolutions)
kernel_height_effective
=
k_height
+
(
k_height
-
1
)
*
(
rate
-
1
)
pad_h_beg
=
(
kernel_height_effective
-
1
)
//
2
pad_h_end
=
kernel_height_effective
-
1
-
pad_h_beg
kernel_width_effective
=
k_width
+
(
k_width
-
1
)
*
(
rate
-
1
)
pad_w_beg
=
(
kernel_width_effective
-
1
)
//
2
pad_w_end
=
kernel_width_effective
-
1
-
pad_w_beg
padding
=
[[
0
,
0
],
[
pad_h_beg
,
pad_h_end
],
[
pad_w_beg
,
pad_w_end
],
[
0
,
0
]]
if
self
.
data_format
==
'NCHW'
:
padding
=
[
padding
[
0
],
padding
[
3
],
padding
[
1
],
padding
[
2
]]
padded_input_layer
=
tf
.
pad
(
input_layer
,
padding
)
conv
=
self
.
_conv2d_impl
(
padded_input_layer
,
num_channels_in
,
num_out_channels
,
kernel_size
=
[
k_height
,
k_width
],
strides
=
[
d_height
,
d_width
],
padding
=
'VALID'
,
kernel_initializer
=
kernel_initializer
)
if
use_batch_norm
is
None
:
use_batch_norm
=
self
.
use_batch_norm
mlperf
.
logger
.
log_conv2d
(
input_tensor
=
input_layer
,
output_tensor
=
conv
,
stride_height
=
d_height
,
stride_width
=
d_width
,
filters
=
num_out_channels
,
initializer
=
kernel_initializer
,
use_bias
=
not
use_batch_norm
and
bias
is
not
None
)
if
not
use_batch_norm
:
if
bias
is
not
None
:
biases
=
self
.
get_variable
(
'biases'
,
[
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
constant_initializer
(
bias
))
biased
=
tf
.
reshape
(
tf
.
nn
.
bias_add
(
conv
,
biases
,
data_format
=
self
.
data_format
),
conv
.
get_shape
())
else
:
biased
=
conv
else
:
self
.
top_layer
=
conv
self
.
top_size
=
num_out_channels
biased
=
self
.
batch_norm
(
**
self
.
batch_norm_config
)
if
activation
==
'relu'
:
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_RELU
)
conv1
=
tf
.
nn
.
relu
(
biased
)
elif
activation
==
'linear'
or
activation
is
None
:
conv1
=
biased
elif
activation
==
'tanh'
:
conv1
=
tf
.
nn
.
tanh
(
biased
)
else
:
raise
KeyError
(
'Invalid activation type
\'
%s
\'
'
%
activation
)
self
.
top_layer
=
conv1
self
.
top_size
=
num_out_channels
return
conv1
def
_pool
(
self
,
pool_name
,
pool_function
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
):
"""Construct a pooling layer."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
num_channels_in
name
=
pool_name
+
str
(
self
.
counts
[
pool_name
])
self
.
counts
[
pool_name
]
+=
1
if
self
.
use_tf_layers
:
pool
=
pool_function
(
input_layer
,
[
k_height
,
k_width
],
[
d_height
,
d_width
],
padding
=
mode
,
data_format
=
self
.
channel_pos
,
name
=
name
)
else
:
if
self
.
data_format
==
'NHWC'
:
ksize
=
[
1
,
k_height
,
k_width
,
1
]
strides
=
[
1
,
d_height
,
d_width
,
1
]
else
:
ksize
=
[
1
,
1
,
k_height
,
k_width
]
strides
=
[
1
,
1
,
d_height
,
d_width
]
pool
=
tf
.
nn
.
max_pool
(
input_layer
,
ksize
,
strides
,
padding
=
mode
,
data_format
=
self
.
data_format
,
name
=
name
)
if
pool_name
==
'mpool'
:
mlperf
.
logger
.
log_max_pool
(
input_tensor
=
input_layer
,
output_tensor
=
pool
)
self
.
top_layer
=
pool
return
pool
def
mpool
(
self
,
k_height
,
k_width
,
d_height
=
2
,
d_width
=
2
,
mode
=
'VALID'
,
input_layer
=
None
,
num_channels_in
=
None
):
"""Construct a max pooling layer."""
return
self
.
_pool
(
'mpool'
,
pooling_layers
.
max_pooling2d
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
)
def
apool
(
self
,
k_height
,
k_width
,
d_height
=
2
,
d_width
=
2
,
mode
=
'VALID'
,
input_layer
=
None
,
num_channels_in
=
None
):
"""Construct an average pooling layer."""
return
self
.
_pool
(
'apool'
,
pooling_layers
.
average_pooling2d
,
k_height
,
k_width
,
d_height
,
d_width
,
mode
,
input_layer
,
num_channels_in
)
def
reshape
(
self
,
shape
,
input_layer
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
self
.
top_layer
=
tf
.
reshape
(
input_layer
,
shape
)
self
.
top_size
=
shape
[
-
1
]
# HACK This may not always work
return
self
.
top_layer
def
affine
(
self
,
num_out_channels
,
input_layer
=
None
,
num_channels_in
=
None
,
bias
=
0.0
,
stddev
=
None
,
activation
=
'relu'
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
num_channels_in
is
None
:
num_channels_in
=
self
.
top_size
name
=
'affine'
+
str
(
self
.
counts
[
'affine'
])
self
.
counts
[
'affine'
]
+=
1
with
tf
.
variable_scope
(
name
):
init_factor
=
2.
if
activation
==
'relu'
else
1.
stddev
=
stddev
or
np
.
sqrt
(
init_factor
/
num_channels_in
)
kernel
=
self
.
get_variable
(
'weights'
,
[
num_channels_in
,
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
stddev
))
biases
=
self
.
get_variable
(
'biases'
,
[
num_out_channels
],
self
.
variable_dtype
,
self
.
dtype
,
initializer
=
tf
.
constant_initializer
(
bias
))
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_DENSE
,
value
=
num_out_channels
)
logits
=
tf
.
nn
.
xw_plus_b
(
input_layer
,
kernel
,
biases
)
if
activation
==
'relu'
:
mlperf
.
logger
.
log
(
key
=
mlperf
.
tags
.
MODEL_HP_RELU
)
affine1
=
tf
.
nn
.
relu
(
logits
,
name
=
name
)
elif
activation
==
'linear'
or
activation
is
None
:
affine1
=
logits
else
:
raise
KeyError
(
'Invalid activation type
\'
%s
\'
'
%
activation
)
self
.
top_layer
=
affine1
self
.
top_size
=
num_out_channels
return
affine1
def
inception_module
(
self
,
name
,
cols
,
input_layer
=
None
,
in_size
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
if
in_size
is
None
:
in_size
=
self
.
top_size
name
+=
str
(
self
.
counts
[
name
])
self
.
counts
[
name
]
+=
1
with
tf
.
variable_scope
(
name
):
col_layers
=
[]
col_layer_sizes
=
[]
for
c
,
col
in
enumerate
(
cols
):
col_layers
.
append
([])
col_layer_sizes
.
append
([])
for
l
,
layer
in
enumerate
(
col
):
ltype
,
args
=
layer
[
0
],
layer
[
1
:]
kwargs
=
{
'input_layer'
:
input_layer
,
'num_channels_in'
:
in_size
}
if
l
==
0
else
{}
if
ltype
==
'conv'
:
self
.
conv
(
*
args
,
**
kwargs
)
elif
ltype
==
'mpool'
:
self
.
mpool
(
*
args
,
**
kwargs
)
elif
ltype
==
'apool'
:
self
.
apool
(
*
args
,
**
kwargs
)
elif
ltype
==
'share'
:
# Share matching layer from previous column
self
.
top_layer
=
col_layers
[
c
-
1
][
l
]
self
.
top_size
=
col_layer_sizes
[
c
-
1
][
l
]
else
:
raise
KeyError
(
'Invalid layer type for inception module:
\'
%s
\'
'
%
ltype
)
col_layers
[
c
].
append
(
self
.
top_layer
)
col_layer_sizes
[
c
].
append
(
self
.
top_size
)
catdim
=
3
if
self
.
data_format
==
'NHWC'
else
1
self
.
top_layer
=
tf
.
concat
([
layers
[
-
1
]
for
layers
in
col_layers
],
catdim
)
self
.
top_size
=
sum
([
sizes
[
-
1
]
for
sizes
in
col_layer_sizes
])
return
self
.
top_layer
def
spatial_mean
(
self
,
keep_dims
=
False
):
name
=
'spatial_mean'
+
str
(
self
.
counts
[
'spatial_mean'
])
self
.
counts
[
'spatial_mean'
]
+=
1
axes
=
[
1
,
2
]
if
self
.
data_format
==
'NHWC'
else
[
2
,
3
]
self
.
top_layer
=
tf
.
reduce_mean
(
self
.
top_layer
,
axes
,
keepdims
=
keep_dims
,
name
=
name
)
return
self
.
top_layer
def
dropout
(
self
,
keep_prob
=
0.5
,
input_layer
=
None
):
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
None
name
=
'dropout'
+
str
(
self
.
counts
[
'dropout'
])
with
tf
.
variable_scope
(
name
):
if
not
self
.
phase_train
:
keep_prob
=
1.0
if
self
.
use_tf_layers
:
dropout
=
core_layers
.
dropout
(
input_layer
,
1.
-
keep_prob
,
training
=
self
.
phase_train
)
else
:
dropout
=
tf
.
nn
.
dropout
(
input_layer
,
keep_prob
)
self
.
top_layer
=
dropout
return
dropout
def
_batch_norm_without_layers
(
self
,
input_layer
,
decay
,
use_scale
,
epsilon
):
"""Batch normalization on `input_layer` without tf.layers."""
# We make this function as similar as possible to the
# tf.contrib.layers.batch_norm, to minimize the differences between using
# layers and not using layers.
shape
=
input_layer
.
shape
num_channels
=
shape
[
3
]
if
self
.
data_format
==
'NHWC'
else
shape
[
1
]
beta
=
self
.
get_variable
(
'beta'
,
[
num_channels
],
tf
.
float32
,
tf
.
float32
,
initializer
=
tf
.
zeros_initializer
())
if
use_scale
:
gamma
=
self
.
get_variable
(
'gamma'
,
[
num_channels
],
tf
.
float32
,
tf
.
float32
,
initializer
=
tf
.
ones_initializer
())
else
:
gamma
=
tf
.
constant
(
1.0
,
tf
.
float32
,
[
num_channels
])
# For moving variables, we use tf.get_variable instead of self.get_variable,
# since self.get_variable returns the result of tf.cast which we cannot
# assign to.
moving_mean
=
tf
.
get_variable
(
'moving_mean'
,
[
num_channels
],
tf
.
float32
,
initializer
=
tf
.
zeros_initializer
(),
trainable
=
False
)
moving_variance
=
tf
.
get_variable
(
'moving_variance'
,
[
num_channels
],
tf
.
float32
,
initializer
=
tf
.
ones_initializer
(),
trainable
=
False
)
if
self
.
phase_train
:
bn
,
batch_mean
,
batch_variance
=
tf
.
nn
.
fused_batch_norm
(
input_layer
,
gamma
,
beta
,
epsilon
=
epsilon
,
data_format
=
self
.
data_format
,
is_training
=
True
)
mean_update
=
moving_averages
.
assign_moving_average
(
moving_mean
,
batch_mean
,
decay
=
decay
,
zero_debias
=
False
)
variance_update
=
moving_averages
.
assign_moving_average
(
moving_variance
,
batch_variance
,
decay
=
decay
,
zero_debias
=
False
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
,
mean_update
)
tf
.
add_to_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
,
variance_update
)
else
:
bn
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
input_layer
,
gamma
,
beta
,
mean
=
moving_mean
,
variance
=
moving_variance
,
epsilon
=
epsilon
,
data_format
=
self
.
data_format
,
is_training
=
False
)
return
bn
def
batch_norm
(
self
,
input_layer
=
None
,
decay
=
0.999
,
scale
=
False
,
epsilon
=
0.001
):
"""Adds a Batch Normalization layer."""
if
input_layer
is
None
:
input_layer
=
self
.
top_layer
else
:
self
.
top_size
=
None
name
=
'batchnorm'
+
str
(
self
.
counts
[
'batchnorm'
])
self
.
counts
[
'batchnorm'
]
+=
1
center
=
True
with
tf
.
variable_scope
(
name
)
as
scope
:
if
self
.
use_tf_layers
:
layer_obj
=
normalization_layers
.
BatchNormalization
(
momentum
=
decay
,
scale
=
scale
,
epsilon
=
epsilon
,
fused
=
True
,
axis
=
_data_format_to_channel_axis
[
self
.
data_format
],
# We pass this 'scope' argument for compatibility with checkpoints
# created with the contrib version of batch norm. tf_cnn_benchmarks
# used to use the contrib version.
_scope
=
scope
,
center
=
center
,
name
=
scope
.
name
)
bn
=
layer_obj
.
apply
(
input_layer
,
training
=
self
.
phase_train
)
else
:
bn
=
self
.
_batch_norm_without_layers
(
input_layer
,
decay
,
scale
,
epsilon
)
self
.
top_layer
=
bn
self
.
top_size
=
bn
.
shape
[
3
]
if
self
.
data_format
==
'NHWC'
else
bn
.
shape
[
1
]
self
.
top_size
=
int
(
self
.
top_size
)
mlperf
.
logger
.
log_batch_norm
(
input_tensor
=
input_layer
,
output_tensor
=
bn
,
momentum
=
decay
,
epsilon
=
epsilon
,
center
=
center
,
scale
=
scale
,
training
=
self
.
phase_train
)
return
bn
def
lrn
(
self
,
depth_radius
,
bias
,
alpha
,
beta
):
"""Adds a local response normalization layer."""
name
=
'lrn'
+
str
(
self
.
counts
[
'lrn'
])
self
.
counts
[
'lrn'
]
+=
1
self
.
top_layer
=
tf
.
nn
.
lrn
(
self
.
top_layer
,
depth_radius
,
bias
,
alpha
,
beta
,
name
=
name
)
return
self
.
top_layer
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark dataset utilities.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
abc
import
abstractmethod
import
os
import
numpy
as
np
import
six
from
six.moves
import
cPickle
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
from
tensorflow.python.platform
import
gfile
import
preprocessing
IMAGENET_NUM_TRAIN_IMAGES
=
1281167
IMAGENET_NUM_VAL_IMAGES
=
50000
COCO_NUM_TRAIN_IMAGES
=
118287
COCO_NUM_VAL_IMAGES
=
4952
class
Dataset
(
object
):
"""Abstract class for cnn benchmarks dataset."""
def
__init__
(
self
,
name
,
data_dir
=
None
,
queue_runner_required
=
False
,
num_classes
=
None
):
self
.
name
=
name
self
.
data_dir
=
data_dir
self
.
_queue_runner_required
=
queue_runner_required
self
.
_num_classes
=
num_classes
def
tf_record_pattern
(
self
,
subset
):
return
os
.
path
.
join
(
self
.
data_dir
,
'%s-*-of-*'
%
subset
)
def
reader
(
self
):
return
tf
.
TFRecordReader
()
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
@
num_classes
.
setter
def
num_classes
(
self
,
val
):
self
.
_num_classes
=
val
@
abstractmethod
def
num_examples_per_epoch
(
self
,
subset
):
pass
def
__str__
(
self
):
return
self
.
name
def
get_input_preprocessor
(
self
,
input_preprocessor
=
'default'
):
assert
not
self
.
use_synthetic_gpu_inputs
()
return
_SUPPORTED_INPUT_PREPROCESSORS
[
self
.
name
][
input_preprocessor
]
def
queue_runner_required
(
self
):
return
self
.
_queue_runner_required
def
use_synthetic_gpu_inputs
(
self
):
return
not
self
.
data_dir
class
LibrispeechDataset
(
Dataset
):
"""Configuration for LibriSpeech dataset."""
def
__init__
(
self
,
data_dir
=
None
):
super
(
LibrispeechDataset
,
self
).
__init__
(
'librispeech'
,
data_dir
,
num_classes
=
29
)
def
tf_record_pattern
(
self
,
subset
):
if
subset
==
'train'
:
return
os
.
path
.
join
(
self
.
data_dir
,
'train-clean-*.tfrecords'
)
elif
subset
==
'validation'
:
return
os
.
path
.
join
(
self
.
data_dir
,
'test-clean.tfrecords'
)
else
:
return
''
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
del
subset
return
2
# TODO(laigd): currently this is an arbitrary number.
class
ImageDataset
(
Dataset
):
"""Abstract class for image datasets."""
def
__init__
(
self
,
name
,
height
,
width
,
depth
=
None
,
data_dir
=
None
,
queue_runner_required
=
False
,
num_classes
=
1001
):
super
(
ImageDataset
,
self
).
__init__
(
name
,
data_dir
,
queue_runner_required
,
num_classes
)
self
.
height
=
height
self
.
width
=
width
self
.
depth
=
depth
or
3
class
ImagenetDataset
(
ImageDataset
):
"""Configuration for Imagenet dataset."""
def
__init__
(
self
,
data_dir
=
None
):
super
(
ImagenetDataset
,
self
).
__init__
(
'imagenet'
,
300
,
300
,
data_dir
=
data_dir
)
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
IMAGENET_NUM_TRAIN_IMAGES
elif
subset
==
'validation'
:
return
IMAGENET_NUM_VAL_IMAGES
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
class
Cifar10Dataset
(
ImageDataset
):
"""Configuration for cifar 10 dataset.
It will mount all the input images to memory.
"""
def
__init__
(
self
,
data_dir
=
None
):
super
(
Cifar10Dataset
,
self
).
__init__
(
'cifar10'
,
32
,
32
,
data_dir
=
data_dir
,
queue_runner_required
=
True
,
num_classes
=
11
)
def
read_data_files
(
self
,
subset
=
'train'
):
"""Reads from data file and returns images and labels in a numpy array."""
assert
self
.
data_dir
,
(
'Cannot call `read_data_files` when using synthetic '
'data'
)
if
subset
==
'train'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'data_batch_%d'
%
i
)
for
i
in
xrange
(
1
,
6
)
]
elif
subset
==
'validation'
:
filenames
=
[
os
.
path
.
join
(
self
.
data_dir
,
'test_batch'
)]
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
inputs
=
[]
for
filename
in
filenames
:
with
gfile
.
Open
(
filename
,
'rb'
)
as
f
:
# python2 does not have the encoding parameter
encoding
=
{}
if
six
.
PY2
else
{
'encoding'
:
'bytes'
}
inputs
.
append
(
cPickle
.
load
(
f
,
**
encoding
))
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
all_images
=
np
.
concatenate
(
[
each_input
[
b
'data'
]
for
each_input
in
inputs
]).
astype
(
np
.
float32
)
all_labels
=
np
.
concatenate
(
[
each_input
[
b
'labels'
]
for
each_input
in
inputs
])
return
all_images
,
all_labels
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
50000
elif
subset
==
'validation'
:
return
10000
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
class
COCODataset
(
ImageDataset
):
"""COnfiguration for COCO dataset."""
def
__init__
(
self
,
data_dir
=
None
,
image_size
=
300
):
super
(
COCODataset
,
self
).
__init__
(
'coco'
,
image_size
,
image_size
,
data_dir
=
data_dir
,
num_classes
=
81
)
def
num_examples_per_epoch
(
self
,
subset
=
'train'
):
if
subset
==
'train'
:
return
COCO_NUM_TRAIN_IMAGES
elif
subset
==
'validation'
:
return
COCO_NUM_VAL_IMAGES
else
:
raise
ValueError
(
'Invalid data subset "%s"'
%
subset
)
_SUPPORTED_DATASETS
=
{
'imagenet'
:
ImagenetDataset
,
'cifar10'
:
Cifar10Dataset
,
'librispeech'
:
LibrispeechDataset
,
'coco'
:
COCODataset
,
}
_SUPPORTED_INPUT_PREPROCESSORS
=
{
'imagenet'
:
{
'default'
:
preprocessing
.
RecordInputImagePreprocessor
,
'official_models_imagenet'
:
preprocessing
.
ImagenetPreprocessor
,
},
'cifar10'
:
{
'default'
:
preprocessing
.
Cifar10ImagePreprocessor
},
'librispeech'
:
{
'default'
:
preprocessing
.
LibrispeechPreprocessor
},
'coco'
:
{
'default'
:
preprocessing
.
COCOPreprocessor
},
}
def
create_dataset
(
data_dir
,
data_name
):
"""Create a Dataset instance based on data_dir and data_name."""
if
not
data_dir
and
not
data_name
:
# When using synthetic data, use synthetic imagenet images by default.
data_name
=
'imagenet'
# Infere dataset name from data_dir if data_name is not provided.
if
data_name
is
None
:
for
supported_name
in
_SUPPORTED_DATASETS
:
if
supported_name
in
data_dir
:
data_name
=
supported_name
break
else
:
# Failed to identify dataset name from data dir.
raise
ValueError
(
'Could not identify name of dataset. '
'Please specify with --data_name option.'
)
if
data_name
not
in
_SUPPORTED_DATASETS
:
raise
ValueError
(
'Unknown dataset. Must be one of %s'
%
', '
.
join
(
[
key
for
key
in
sorted
(
_SUPPORTED_DATASETS
.
keys
())]))
return
_SUPPORTED_DATASETS
[
data_name
](
data_dir
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions to define flags and params.
Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
a command-line flag for every ParamSpec defined by a DEFINE_* functions.
The reason we don't use absl flags directly is that we want to be able to use
tf_cnn_benchmarks as a library. When using it as a library, we don't want to
define any flags, but instead pass parameters to the BenchmarkCNN constructor.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
from
absl
import
flags
as
absl_flags
import
six
FLAGS
=
absl_flags
.
FLAGS
# ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
ParamSpec
=
namedtuple
(
'_ParamSpec'
,
[
'flag_type'
,
'default_value'
,
'description'
,
'kwargs'
])
# Maps from parameter name to its ParamSpec.
param_specs
=
{}
def
DEFINE_string
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'string'
,
default
,
help
,
{})
def
DEFINE_boolean
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'boolean'
,
default
,
help
,
{})
def
DEFINE_integer
(
name
,
default
,
help
,
lower_bound
=
None
,
upper_bound
=
None
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'lower_bound'
:
lower_bound
,
'upper_bound'
:
upper_bound
}
param_specs
[
name
]
=
ParamSpec
(
'integer'
,
default
,
help
,
kwargs
)
def
DEFINE_float
(
name
,
default
,
help
,
lower_bound
=
None
,
upper_bound
=
None
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'lower_bound'
:
lower_bound
,
'upper_bound'
:
upper_bound
}
param_specs
[
name
]
=
ParamSpec
(
'float'
,
default
,
help
,
kwargs
)
def
DEFINE_enum
(
name
,
default
,
enum_values
,
help
):
# pylint: disable=invalid-name,redefined-builtin
kwargs
=
{
'enum_values'
:
enum_values
}
param_specs
[
name
]
=
ParamSpec
(
'enum'
,
default
,
help
,
kwargs
)
def
DEFINE_list
(
name
,
default
,
help
):
# pylint: disable=invalid-name,redefined-builtin
param_specs
[
name
]
=
ParamSpec
(
'list'
,
default
,
help
,
{})
def
define_flags
(
specs
=
None
):
"""Define a command line flag for each ParamSpec in flags.param_specs."""
specs
=
specs
or
param_specs
define_flag
=
{
'boolean'
:
absl_flags
.
DEFINE_boolean
,
'float'
:
absl_flags
.
DEFINE_float
,
'integer'
:
absl_flags
.
DEFINE_integer
,
'string'
:
absl_flags
.
DEFINE_string
,
'enum'
:
absl_flags
.
DEFINE_enum
,
'list'
:
absl_flags
.
DEFINE_list
}
for
name
,
param_spec
in
six
.
iteritems
(
specs
):
if
param_spec
.
flag_type
not
in
define_flag
:
raise
ValueError
(
'Unknown flag_type %s'
%
param_spec
.
flag_type
)
else
:
define_flag
[
param_spec
.
flag_type
](
name
,
param_spec
.
default_value
,
help
=
param_spec
.
description
,
**
param_spec
.
kwargs
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark various leading indicators CNNs.
The purpose of these tests is to test each model as a high level baseline and
to ensure the various variable_update options have not regressing. Not all
options are tested. The tests focus on the most viable options.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
ctypes
import
logging
import
os
import
sys
from
absl
import
flags
from
absl.testing
import
absltest
# pylint: disable=unused-import
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-bad-import-order
import
benchmark_cnn
from
platforms
import
util
as
platforms_util
flags
.
DEFINE_integer
(
'num_batches'
,
None
,
'number of batches to run, excluding warmup'
)
class
BenchmarkBase
(
tf
.
test
.
Benchmark
):
"""Base class for all benchmarks in this file."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""Base class for all benchmarks in this file.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
# Load default values if the benchmark is not run with absl.app.run()
if
not
flags
.
FLAGS
.
is_parsed
():
flags
.
FLAGS
.
mark_as_parsed
()
self
.
fake_data_dir
=
os
.
path
.
join
(
platforms_util
.
get_test_data_dir
(),
'fake_tf_record_data'
)
self
.
output_dir
=
output_dir
if
root_data_dir
is
None
:
self
.
data_dir
=
(
'/readahead/200M/placer/prod/home/distbelief/'
'imagenet-tensorflow/imagenet-2012-tfrecord'
)
else
:
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def
_run_benchmark
(
self
,
params
):
"""Run a CNN benchmark and report its results.
Args:
params: Params tuple, typically created by benchmark_cnn.make_params or
benchmark_cnn.make_params_from_flags.
"""
logging
.
info
(
'Running benchmark [%s]'
,
self
.
_get_name
())
params
=
benchmark_cnn
.
setup
(
params
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
stats
=
bench
.
run
()
extras
=
{}
extras
[
'examples_per_sec'
]
=
stats
.
get
(
'images_per_sec'
)
if
'last_average_loss'
in
stats
:
extras
[
'last_average_loss'
]
=
stats
[
'last_average_loss'
]
if
'top_1_accuracy'
in
stats
:
extras
[
'top_1_accuracy'
]
=
stats
[
'top_1_accuracy'
]
if
'top_5_accuracy'
in
stats
:
extras
[
'top_5_accuracy'
]
=
stats
[
'top_5_accuracy'
]
self
.
report_benchmark
(
iters
=
stats
.
get
(
'num_steps'
),
wall_time
=
stats
.
get
(
'average_wall_time'
),
extras
=
extras
)
def
_shared_params
(
self
):
"""Returns shared parameters for all benchmarks in this file."""
params
=
{}
if
flags
.
FLAGS
.
num_batches
is
not
None
:
params
[
'num_batches'
]
=
flags
.
FLAGS
.
num_batches
if
self
.
output_dir
is
not
None
:
params
[
'benchmark_log_dir'
]
=
self
.
output_dir
return
benchmark_cnn
.
make_params
(
**
params
)
def
_binary_search_batch_size
(
self
,
params
,
init_batch_size
):
"""Find the max batch_size using binary search."""
assert
init_batch_size
>
0
low_batch_size
=
0
high_batch_size
=
None
batch_size
=
init_batch_size
# No need to run a warmup or many batches; if it doesn't OOM after 10
# batches, it should work in general.
params
=
params
.
_replace
(
num_batches
=
10
,
num_warmup_batches
=
0
)
# Find high_batch_size first.
tf
.
logging
.
info
(
'Looking for upper bound to batch size, starting with %d'
%
batch_size
)
while
high_batch_size
is
None
:
tf
.
logging
.
info
(
'Trying batch_size %d'
%
batch_size
)
params
=
params
.
_replace
(
batch_size
=
batch_size
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
try
:
bench
.
run
()
low_batch_size
=
batch_size
batch_size
*=
2
except
tf
.
errors
.
ResourceExhaustedError
:
high_batch_size
=
batch_size
-
1
# Binary Search
tf
.
logging
.
info
(
'Max batch size is in range (%d, %d]. Starting binary search to find '
'exact max batch size.'
%
(
low_batch_size
,
batch_size
))
while
low_batch_size
<
high_batch_size
:
batch_size
=
(
low_batch_size
+
high_batch_size
+
1
)
//
2
tf
.
logging
.
info
(
'Trying batch_size %d'
%
batch_size
)
params
=
params
.
_replace
(
batch_size
=
batch_size
)
bench
=
benchmark_cnn
.
BenchmarkCNN
(
params
)
bench
.
print_info
()
try
:
bench
.
run
()
low_batch_size
=
batch_size
except
tf
.
errors
.
ResourceExhaustedError
:
high_batch_size
=
batch_size
-
1
self
.
report_benchmark
(
extras
=
{
'max_batch_size'
:
low_batch_size
})
class
Resnet50BenchmarksInferenceCpu
(
BenchmarkBase
):
""""Benchmarks for ResNet50 inference on CPU."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
num_gpus
=
1
,
model
=
'resnet50'
,
num_warmup_batches
=
5
,
num_batches
=
50
,
distortions
=
False
,
forward_only
=
True
,
device
=
'cpu'
,
data_format
=
'NHWC'
,
num_intra_threads
=
0
)
def
benchmark_synth_forward_batch1
(
self
):
"""Tests 1 CPU batch size 1."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_forward_batch16
(
self
):
"""Tests 1 CPU batch size 16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
)
self
.
_run_benchmark
(
params
)
class
FrozenResnet50BenchmarksInferenceCpu
(
Resnet50BenchmarksInferenceCpu
):
""""Benchmarks for ResNet50 frozen graph inference on CPU."""
def
_shared_params
(
self
):
return
super
(
FrozenResnet50BenchmarksInferenceCpu
,
self
).
_shared_params
().
_replace
(
freeze_when_forward_only
=
True
)
class
Resnet50BenchmarksInference
(
BenchmarkBase
):
""""Benchmarks for ResNet50 inference."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
num_gpus
=
1
,
model
=
'resnet50'
,
distortions
=
False
,
forward_only
=
True
)
def
benchmark_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 FP16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_forward_batch16
(
self
):
"""Tests 1 GPU batch size 16 FP16."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128 FP16 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
use_fp16
=
True
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_forward_batch16
(
self
):
"""Tests 1 GPU batch size 16 FP16 with XLA."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
16
,
use_fp16
=
True
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
FrozenResnet50BenchmarksInference
(
Resnet50BenchmarksInference
):
""""Benchmarks for ResNet50 frozen graph inference."""
def
_shared_params
(
self
):
return
super
(
FrozenResnet50BenchmarksInference
,
self
).
_shared_params
().
_replace
(
freeze_when_forward_only
=
True
)
def
benchmark_trt_synth_forward_batch128
(
self
):
"""Tests 1 GPU batch size 128."""
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
trt_mode
=
'FP32'
)
self
.
_run_benchmark
(
params
)
# TODO(laigd): enable fp16 tests for TF-TRT, it's currently not supported yet.
# def benchmark_fp16_trt_synth_forward_batch128(self):
# """Tests 1 GPU batch size 128 FP16."""
# params = self._shared_params()._replace(
# batch_size=128, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
# Test with batch size 16 to compare with native TF GPU implementation and
# XLA.
# def benchmark_fp16_trt_synth_forward_batch16(self):
# """Tests 1 GPU batch size 16 FP16."""
# params = self._shared_params()._replace(
# batch_size=16, use_fp16=True, trt_mode='FP16')
# self._run_benchmark(params)
class
Resnet50Benchmarks
(
BenchmarkBase
):
""""Benchmark resnet50 configurations."""
def
_shared_params
(
self
):
"""Returns shared parameters for all ResNet50 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50'
,
batch_size
=
128
,
distortions
=
False
,
optimizer
=
'momentum'
)
def
_shared_params_fp16
(
self
):
"""Returns shared parameters for all ResNet50 FP16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50'
,
batch_size
=
256
,
distortions
=
False
,
use_fp16
=
True
,
optimizer
=
'momentum'
,
loss_type_to_report
=
'base_loss'
,
compute_lr_on_cpu
=
True
,
single_l2_loss_op
=
True
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_synth_4gpu_gpureplicated
(
self
):
"""Tests 4 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
4
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# FP16 mixed-precision tests.
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams_batch128
(
self
):
"""Tests 1 gpu with synthetic data with parameters on the gpu."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_4gpu_gpureplicated
(
self
):
"""Tests 4 gpu with synthetic data with nccl and all_reduce."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
4
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic with nccl and all_reduce."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpus with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fakedistort_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake distorted data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
distortions
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of Resnet50 tests only for single GPU.
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing on guitar.
def
benchmark_ng_xla_batch64_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with XLA, synth data, and batch 64."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
batch_size
=
64
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch64_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA, synth data, and batch 64."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
64
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA, and synth data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fakedistort_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake distorted data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
distortions
=
True
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
Resnet50v15Benchmarks
(
BenchmarkBase
):
""""Benchmark various ResNet50V1.5 configurations.
ResNetV1.5 differs from V1 in stride 2 is used in the first 3x3 convolution of
each block instead of the first 1x1 convolution.
"""
def
_shared_params_fp16
(
self
):
"""Returns shared parameters for all ResNet50v1.5 FP16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'resnet50_v1.5'
,
batch_size
=
256
,
distortions
=
False
,
use_fp16
=
True
,
optimizer
=
'momentum'
,
loss_type_to_report
=
'base_loss'
,
compute_lr_on_cpu
=
True
,
single_l2_loss_op
=
True
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_batch256_synth_8gpu_gpuparams
(
self
):
"""Tests 8 gpus with synthetic data at batch 256."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of Resnet50v1.5 tests.
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, batch128, synthetic data with XLA."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
batch_size
=
128
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_batch128_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
num_batches
=
200
,
batch_size
=
128
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_batch256_synth_8gpu_gpuparams
(
self
):
"""Tests 8 gpu with synthetic data and xla autojit."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
batch_size
=
256
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with fake data with parameters replicated."""
params
=
self
.
_shared_params_fp16
().
_replace
(
num_gpus
=
8
,
num_batches
=
200
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
class
Vgg16Benchmarks
(
BenchmarkBase
):
""""Benchmark various vgg16 configurations."""
def
_shared_params
(
self
):
"""Returns shared parameters for all vgg16 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'vgg16'
,
batch_size
=
128
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpu with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
all_reduce_spec
=
'nccl'
,
variable_update
=
'replicated'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA versions of VGG16 tests only for single GPU.
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data, and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
TrivialBenchmarks
(
BenchmarkBase
):
""""Benchmarks for trivial model.
The purpose of these tests is to verify the upper bound for the input
pipeline. Fake data creates an upperbound on the input pipeline throughput.
"""
def
_shared_params
(
self
):
"""Returns shared parameters for all trivial benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'trivial'
,
num_gpus
=
8
,
distortions
=
False
,
variable_update
=
'independent'
,
data_dir
=
self
.
fake_data_dir
)
def
benchmark_fake_64batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
64
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_128batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_256batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
256
,
data_name
=
'imagenet'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fakedistort_128batch
(
self
):
params
=
self
.
_shared_params
().
_replace
(
batch_size
=
128
,
data_name
=
'imagenet'
,
distortions
=
True
)
self
.
_run_benchmark
(
params
)
class
AlexnetBenchmarks
(
BenchmarkBase
):
""""Benchmarks for alexnet."""
def
_shared_params
(
self
):
"""Returns shared parameters for all alexnet benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'alexnet'
,
batch_size
=
512
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data with parameters on gpu."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with synthetic data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
def
benchmark_fake_8gpu_gpureplicated
(
self
):
"""Tests 8 gpus with fake data with parameters replicated."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
compact_gradient_transfer
=
False
,
gradient_repacking
=
2
)
self
.
_run_benchmark
(
params
)
# XLA Benchmark tests for AlexNet.
def
benchmark_xla_synth_1gpuparams
(
self
):
"""Tests 1 gpu with synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, synthetic data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpuparams
(
self
):
"""Tests 1 gpu with fake data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpuparams
(
self
):
"""Tests 1 gpu with real data and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
InceptionV3Benchmarks
(
BenchmarkBase
):
""""Benchmark for InceptionV3."""
def
_shared_params
(
self
):
"""Returns shared parameters for all InceptionV3 benchmarks."""
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'inception3'
,
batch_size
=
64
,
distortions
=
False
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
,
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch size that can be run with 1 gpu using synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with synthetic and XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fp16, XLA and synthetic data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_max_batch_size
(
self
):
"""Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_binary_search_batch_size
(
params
,
init_batch_size
=
128
)
# Test does not run as part of continuous testing.
def
benchmark_xla_fake_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with fake data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
fake_data_dir
,
data_name
=
'imagenet'
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_real_1gpu_gpuparams
(
self
):
"""Tests 1 gpu with real data with XLA."""
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
data_dir
=
self
.
data_dir
,
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
class
NcfBenchmarks
(
BenchmarkBase
):
"""Benchmarks for neural collaborative filtering."""
def
_shared_params
(
self
):
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'ncf'
,
batch_size
=
64
*
1024
,
num_gpus
=
1
,
num_warmup_batches
=
1
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
class
DeepSpeech2Benchmarks
(
BenchmarkBase
):
"""Benchmarks for DeepSpeech2 model."""
def
_shared_params
(
self
):
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
model
=
'deepspeech2'
,
batch_size
=
32
,
num_gpus
=
1
,
data_name
=
'librispeech'
)
def
benchmark_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_synth_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
variable_update
=
'parameter_server'
,
xla_compile
=
True
)
self
.
_run_benchmark
(
params
)
class
SsdBenchmarks
(
BenchmarkBase
):
"""Benchmarks for SSD model."""
def
_cudnn_version
(
self
):
if
sys
.
platform
==
'win32'
:
return
None
lib
=
ctypes
.
cdll
.
LoadLibrary
(
None
)
if
hasattr
(
lib
,
'cudnnGetErrorString'
):
version
=
lib
.
cudnnGetVersion
()
return
version
return
None
def
_shared_params
(
self
):
cudnn_version
=
self
.
_cudnn_version
()
if
cudnn_version
is
None
or
cudnn_version
<
7300
:
raise
RuntimeError
(
'Needs at least cuDNN 7.3 to work with fp16 (b/112048183). '
'Build with --define=use_experimental_cudnn=1'
)
return
BenchmarkBase
.
_shared_params
(
self
).
_replace
(
# TODO(b/115672206): Replace backbone model and data dir with replicated
# placer location for better performance.
backbone_model_path
=
platforms_util
.
get_ssd_backborn_model_file
(),
# pylint: disable=line-too-long
data_dir
=
platforms_util
.
get_ssd_backboard_data_dir
(),
batch_size
=
128
,
data_name
=
'coco'
,
model
=
'ssd300'
,
num_batches
=
10
,
num_warmup_batches
=
1
,
num_gpus
=
1
,
optimizer
=
'momentum'
,
momentum
=
0.9
,
weight_decay
=
5e-4
,
loss_type_to_report
=
'base_loss'
,
single_l2_loss_op
=
True
,
compute_lr_on_cpu
=
True
,
)
def
benchmark_xla_compile_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_fp16_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
xla_compile
=
True
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_real_1gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
1
,
use_fp16
=
True
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
xla_compile
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_xla_compile_fp16_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
xla_compile
=
True
,
use_fp16
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
def
benchmark_fp16_real_8gpu_gpuparams
(
self
):
params
=
self
.
_shared_params
().
_replace
(
num_gpus
=
8
,
use_fp16
=
True
,
variable_update
=
'replicated'
,
all_reduce_spec
=
'nccl'
,
gradient_repacking
=
2
,
num_batches
=
50
,
)
self
.
_run_benchmark
(
params
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions related to MLPerf compliance.
MLPerf requires submissions to log what the benchmark does, in order to verify
that the benchmark meets the MLPerf requirements. This module contains a global
object `logger` that is used by other files to log what tf_cnn_benchmarks does
for compliance.
By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
unnecessary if one is not concerned about MLPerf compliance. The logger can be
enabled by using the `mlperf_logger` context manager.
To enable the logger with `mlperf_logger`, the MLPerf compliance library at
https://github.com/mlperf/training/tree/master/compliance is required. If
the logger is not enabled, the library is not needed.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
namedtuple
import
contextlib
import
os
import
sys
import
tensorflow.compat.v1
as
tf
# pylint: disable=g-import-not-at-top
try
:
# Not all users have the MLPerf compliance library, so we don't want to
# unconditionally crash if these imports fail.
from
mlperf_compliance
import
mlperf_log
from
mlperf_compliance
import
resnet_log_helper
from
mlperf_compliance
import
tags
import_successful
=
True
except
ImportError
:
# The logger cannot be enabled in this case since the MLPerf library isn't
# found. We return empty strings from the `tags` attribute so that
# the benchmark can still run without crashing. This empty tags are passed
# to an instance of `NullMlPerfLogger`, which does not log anything and
# ignores the tag values.
class
_Tags
(
object
):
def
__getattr__
(
self
,
item
):
return
''
tags
=
_Tags
()
import_successful
=
False
# pylint: enable=g-import-not-at-top
_ModelInfo
=
namedtuple
(
'_ModelInfo'
,
[
'print_fn'
,
'tag_set'
,
'mlperf_model_name'
])
_MLPERF_LOG_PREFIX
=
':::MLPv0.5.0'
class
MlPerfLogger
(
object
):
"""Logs various aspects about a benchmark run for MLPerf compliance."""
def
__init__
(
self
,
model
):
self
.
_root_dir
=
os
.
path
.
split
(
os
.
path
.
abspath
(
__file__
))[
0
]
mlperf_log
.
ROOT_DIR_RESNET
=
self
.
_root_dir
mlperf_log
.
ROOT_DIR_SSD
=
self
.
_root_dir
self
.
model
=
model
model_to_info
=
{
'resnet50_v1.5'
:
_ModelInfo
(
mlperf_log
.
resnet_print
,
mlperf_log
.
RESNET_TAG_SET
,
tags
.
RESNET
),
'ssd300'
:
_ModelInfo
(
mlperf_log
.
ssd_print
,
mlperf_log
.
SSD_TAG_SET
,
tags
.
SSD
)
}
try
:
self
.
_log_fn
,
self
.
tag_set
,
self
.
mlperf_model_name
=
model_to_info
[
model
]
except
KeyError
:
raise
ValueError
(
'--ml_perf_compliance_logging is only compatible when '
'--model is one of the following: '
+
', '
.
join
(
model_to_info
.
keys
()))
def
log
(
self
,
key
,
value
=
None
,
stack_offset
=
2
):
if
key
in
self
.
tag_set
:
self
.
_log_fn
(
key
,
value
,
stack_offset
)
else
:
print
(
'Ignoring MLPerf logging item key=%s, value=%s for model %s'
%
(
key
,
value
,
self
.
model
))
def
log_deferred_tensor_value
(
self
,
key
,
tensor_value
,
global_step
,
stack_offset
=
2
,
every_n
=
1
):
"""Logs the value of a tensor when the graph is run."""
caller
=
'(%s)'
%
mlperf_log
.
get_caller
(
stack_offset
,
self
.
_root_dir
)
def
create_print_op
():
return
tf
.
print
(
_MLPERF_LOG_PREFIX
,
self
.
mlperf_model_name
,
tf
.
timestamp
(),
caller
,
key
,
': { "deferred": true, "value":'
,
tensor_value
,
'}'
,
output_stream
=
sys
.
stdout
)
maybe_print
=
tf
.
cond
(
tf
.
equal
(
global_step
%
every_n
,
0
),
create_print_op
,
tf
.
no_op
)
with
tf
.
control_dependencies
([
maybe_print
]):
return
tf
.
identity
(
tensor_value
)
def
log_max_pool
(
self
,
input_tensor
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_max_pool
(
input_tensor
,
output_tensor
)
def
log_begin_block
(
self
,
input_tensor
,
block_type
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_begin_block
(
input_tensor
,
block_type
)
def
log_end_block
(
self
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_end_block
(
output_tensor
)
def
log_projection
(
self
,
input_tensor
,
output_tensor
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_projection
(
input_tensor
,
output_tensor
)
def
log_conv2d
(
self
,
input_tensor
,
output_tensor
,
stride_height
,
stride_width
,
filters
,
initializer
,
use_bias
):
"""Log a conv2d call."""
if
self
.
model
==
'resnet50_v1.5'
:
assert
stride_height
==
stride_width
,
(
'--ml_perf_compliance_logging does not support convolutions where '
'the stride height is not equal to the stride width. '
'stride_height=%d, stride_width=%d'
%
(
stride_height
,
stride_width
))
if
isinstance
(
initializer
,
tf
.
truncated_normal_initializer
)
or
(
isinstance
(
initializer
,
tf
.
variance_scaling_initializer
)
and
initializer
.
distribution
==
'truncated_normal'
):
initializer
=
tags
.
TRUNCATED_NORMAL
elif
(
isinstance
(
initializer
,
tf
.
glorot_uniform_initializer
)
or
initializer
is
None
):
initializer
=
'glorot_uniform'
resnet_log_helper
.
log_conv2d
(
input_tensor
,
output_tensor
,
stride_width
,
filters
,
initializer
,
use_bias
)
def
log_batch_norm
(
self
,
input_tensor
,
output_tensor
,
momentum
,
epsilon
,
center
,
scale
,
training
):
if
self
.
model
==
'resnet50_v1.5'
:
resnet_log_helper
.
log_batch_norm
(
input_tensor
,
output_tensor
,
momentum
,
epsilon
,
center
,
scale
,
training
)
def
log_train_epochs
(
self
,
num_epochs
):
"""Logs all the TRAIN_EPOCHs log lines."""
num_epochs_int
=
int
(
num_epochs
)
for
i
in
range
(
num_epochs_int
):
# MLPerf allows us to print all the train epochs at once instead of
# printing them as we do them.
self
.
log
(
key
=
mlperf_log
.
TRAIN_EPOCH
,
value
=
i
,
stack_offset
=
3
)
if
num_epochs_int
!=
num_epochs
:
value
=
(
str
(
num_epochs_int
)
+
', but this epoch only has {}% of the examples of a normal epoch'
.
format
(
100
*
(
num_epochs
-
num_epochs_int
)))
self
.
log
(
key
=
mlperf_log
.
TRAIN_EPOCH
,
value
=
value
,
stack_offset
=
3
)
def
log_input_resize_aspect_preserving
(
self
,
height
,
width
,
scale_factor
):
assert
height
==
width
,
(
'--ml_perf_compliance_logging does not support models with nonsquare '
'images. Cannot process image with height=%d and width=%d'
%
(
height
,
width
))
self
.
log
(
key
=
tags
.
INPUT_RESIZE_ASPECT_PRESERVING
,
value
=
{
'min'
:
int
(
height
*
scale_factor
)})
def
log_eval_epoch
(
self
,
tag
,
global_step
,
batch_size
,
stack_offset
=
2
):
if
self
.
model
==
'resnet50_v1.5'
:
self
.
log
(
key
=
tag
,
stack_offset
=
stack_offset
+
1
)
elif
self
.
model
==
'ssd300'
:
epoch
=
int
(
global_step
*
batch_size
/
118287
)
self
.
log
(
key
=
tag
,
value
=
epoch
,
stack_offset
=
stack_offset
+
1
)
def
log_eval_accuracy
(
self
,
accuracy
,
global_step
,
batch_size
,
examples_per_epoch
,
stack_offset
=
2
):
"""Logs eval accuracy."""
epoch
=
int
(
global_step
*
batch_size
/
examples_per_epoch
)
eval_accuracy
=
{
'epoch'
:
epoch
,
'value'
:
accuracy
}
eval_iteration_accuracy
=
{
'iteration'
:
global_step
,
'value'
:
accuracy
}
self
.
log
(
key
=
tags
.
EVAL_ACCURACY
,
value
=
eval_accuracy
,
stack_offset
=
stack_offset
+
1
)
self
.
log
(
key
=
tags
.
EVAL_ITERATION_ACCURACY
,
value
=
eval_iteration_accuracy
,
stack_offset
=
stack_offset
+
1
)
def
_empty_fn
(
*
args
,
**
kwargs
):
del
args
,
kwargs
class
NullMlPerfLogger
(
object
):
"""A version of `MlPerfLogger` that does not log anything.
This class has the same interface as `MlPerfLogger`, but does not actually do
anything. This is used when logging is disabled, which is the default
behavior.
"""
def
__getattr__
(
self
,
item
):
return
_empty_fn
def
log_deferred_tensor_value
(
self
,
key
,
tensor_value
,
*
args
,
**
kwargs
):
del
key
,
args
,
kwargs
return
tensor_value
# A global singleton logger. By default, it's the null logger but can be
# switched to an MlPerfLogger with `mlperf_logger()`.
logger
=
NullMlPerfLogger
()
@
contextlib
.
contextmanager
def
mlperf_logger
(
use_mlperf_logger
,
model
):
"""Optionally enable the mlperf logger.
If `use_mlperf_logger` is True, sets the `logger` global variable to an
instance of MlPerfLogger that will print logs for MLPerf compliance. If
`use_mlperf_logger` is False, does nothing.
Args:
use_mlperf_logger: If True, enables the mlperf logger. If False, this
function does nothing.
model: The model that will be logged. Required, because different models
must log different things for MLPerf compliance.
Yields:
Nothing.
Raises:
ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
library cannot be imported
"""
global
logger
if
use_mlperf_logger
:
if
not
import_successful
:
raise
ImportError
(
'Failed to import MLPerf compliance library, which is '
'required when --ml_perf_compliance_logging is '
'specified. Clone this repo and add this directory '
'https://github.com/mlperf/training/tree/master/'
'compliance to the PYTHONPATH environmental variable.'
)
logger_
=
MlPerfLogger
(
model
)
old_logger
=
logger
try
:
logger
=
logger_
yield
finally
:
logger
=
old_logger
else
:
yield
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains tests related to MLPerf.
Note this test only passes if the MLPerf compliance library is installed.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
Counter
import
logging
import
re
import
six
import
tensorflow.compat.v1
as
tf
import
benchmark_cnn
import
datasets
import
mlperf
import
test_util
from
models
import
model
from
mlperf_compliance
import
mlperf_log
class
_MlPerfTestModel
(
model
.
CNNModel
):
"""A model to test the MLPerf compliance logging on."""
def
__init__
(
self
):
super
(
_MlPerfTestModel
,
self
).
__init__
(
'mlperf_test_model'
,
image_size
=
224
,
batch_size
=
2
,
learning_rate
=
1
)
def
add_inference
(
self
,
cnn
):
assert
cnn
.
top_layer
.
shape
[
1
:]
==
(
3
,
224
,
224
)
cnn
.
conv
(
1
,
1
,
1
,
1
,
1
,
use_batch_norm
=
True
)
cnn
.
mpool
(
1
,
1
,
1
,
1
,
num_channels_in
=
1
)
cnn
.
reshape
([
-
1
,
224
*
224
])
cnn
.
affine
(
1
,
activation
=
None
)
# Assert that the batch norm variables are filtered out for L2 loss.
variables
=
tf
.
global_variables
()
+
tf
.
local_variables
()
assert
len
(
variables
)
>
len
(
self
.
filter_l2_loss_vars
(
variables
))
class
MlPerfComplianceTest
(
tf
.
test
.
TestCase
):
"""Tests the MLPerf compliance logs.
This serves as a quick check that we probably didn't break the compliance
logging. It is not mean to be as comprehensive as the official MLPerf
compliance checker will be.
"""
def
setUp
(
self
):
super
(
MlPerfComplianceTest
,
self
).
setUp
()
benchmark_cnn
.
setup
(
benchmark_cnn
.
make_params
())
# Map between regex and the number of times we expect to see that regex in the
# logs. Entry commented out with the comment FIXME indicate that
# tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
# fixed to be MLPerf compliant.
EXPECTED_LOG_REGEXES
=
{
# Preprocessing tags
mlperf
.
tags
.
INPUT_ORDER
:
2
,
# 1 for training, 1 for eval
# We pass --tf_random_seed=9876 in the test.
r
'%s: 9876'
%
mlperf
.
tags
.
RUN_SET_RANDOM_SEED
:
2
,
# The Numpy random seed is hardcoded to 4321.
r
'%s: 4321'
%
mlperf
.
tags
.
RUN_SET_RANDOM_SEED
:
2
,
r
'%s: %d'
%
(
mlperf
.
tags
.
PREPROC_NUM_TRAIN_EXAMPLES
,
datasets
.
IMAGENET_NUM_TRAIN_IMAGES
):
1
,
r
'%s: %d'
%
(
mlperf
.
tags
.
PREPROC_NUM_EVAL_EXAMPLES
,
datasets
.
IMAGENET_NUM_VAL_IMAGES
):
1
,
mlperf
.
tags
.
PREPROC_NUM_EVAL_EXAMPLES
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_MIN_OBJ_COV
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_RATIO_RANGE
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_AREA_RANGE
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_DISTORTED_CROP_MAX_ATTEMPTS
+
'.*'
:
1
,
mlperf
.
tags
.
INPUT_RANDOM_FLIP
+
'.*'
:
1
,
r
'%s: \[224, 224\].*'
%
mlperf
.
tags
.
INPUT_CENTRAL_CROP
:
1
,
r
'%s: \[123.68, 116.78, 103.94\].*'
%
mlperf
.
tags
.
INPUT_MEAN_SUBTRACTION
:
2
,
r
'%s: {"min": 256}.*'
%
mlperf
.
tags
.
INPUT_RESIZE_ASPECT_PRESERVING
:
1
,
# 1 for training, 1 for eval
r
'%s: \[224, 224\].*'
%
mlperf
.
tags
.
INPUT_RESIZE
:
2
,
# Resnet model tags
mlperf
.
tags
.
MODEL_HP_BATCH_NORM
+
'.*'
:
2
,
# 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
# produces 2 logs.
mlperf
.
tags
.
MODEL_HP_CONV2D_FIXED_PADDING
+
'.*'
:
4
,
mlperf
.
tags
.
MODEL_HP_RELU
+
'.*'
:
2
,
mlperf
.
tags
.
MODEL_HP_INITIAL_MAX_POOL
+
'.*'
:
2
,
mlperf
.
tags
.
MODEL_HP_DENSE
+
'.*'
:
4
,
mlperf
.
tags
.
MODEL_HP_DENSE
+
'.*'
:
4
,
# Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
# are omitted here.
r
'%s: "categorical_cross_entropy".*'
%
mlperf
.
tags
.
MODEL_HP_LOSS_FN
:
1
,
# 1 for training, 2 because the _MlPerfTestModel calls this when building
# the model for both training and eval
r
'%s: true'
%
mlperf
.
tags
.
MODEL_EXCLUDE_BN_FROM_L2
:
3
,
r
'%s: 0.5.*'
%
mlperf
.
tags
.
MODEL_L2_REGULARIZATION
:
1
,
# Note we do not handle OPT_LR, since that is printed to stderr using
# tf.Print, which we cannot easily intercept.
# Other tags
'%s: "%s"'
%
(
mlperf
.
tags
.
OPT_NAME
,
mlperf
.
tags
.
SGD_WITH_MOMENTUM
):
1
,
'%s: 0.5'
%
mlperf
.
tags
.
OPT_MOMENTUM
:
1
,
mlperf
.
tags
.
RUN_START
:
1
,
'%s: 2'
%
mlperf
.
tags
.
INPUT_BATCH_SIZE
:
1
,
mlperf
.
tags
.
TRAIN_LOOP
:
1
,
mlperf
.
tags
.
TRAIN_EPOCH
+
'.*'
:
1
,
'%s: 2'
%
mlperf
.
tags
.
INPUT_SIZE
:
2
,
mlperf
.
tags
.
EVAL_START
:
2
,
mlperf
.
tags
.
EVAL_STOP
:
2
,
'%s: 6'
%
mlperf
.
tags
.
EVAL_SIZE
:
2
,
mlperf
.
tags
.
EVAL_ACCURACY
+
'.*'
:
2
,
'%s: 2.0'
%
mlperf
.
tags
.
EVAL_TARGET
:
2
,
mlperf
.
tags
.
RUN_STOP
+
'.*'
:
1
,
mlperf
.
tags
.
RUN_FINAL
:
1
}
EXPECTED_LOG_REGEXES
=
Counter
({
re
.
compile
(
k
):
v
for
k
,
v
in
EXPECTED_LOG_REGEXES
.
items
()})
def
testMlPerfCompliance
(
self
):
string_io
=
six
.
StringIO
()
handler
=
logging
.
StreamHandler
(
string_io
)
data_dir
=
test_util
.
create_black_and_white_images
()
try
:
mlperf_log
.
LOGGER
.
addHandler
(
handler
)
params
=
benchmark_cnn
.
make_params
(
data_dir
=
data_dir
,
data_name
=
'imagenet'
,
batch_size
=
2
,
num_warmup_batches
=
0
,
num_batches
=
2
,
num_eval_batches
=
3
,
eval_during_training_every_n_steps
=
1
,
distortions
=
False
,
weight_decay
=
0.5
,
optimizer
=
'momentum'
,
momentum
=
0.5
,
stop_at_top_1_accuracy
=
2.0
,
tf_random_seed
=
9876
,
ml_perf
=
True
)
with
mlperf
.
mlperf_logger
(
use_mlperf_logger
=
True
,
model
=
'resnet50_v1.5'
):
bench_cnn
=
benchmark_cnn
.
BenchmarkCNN
(
params
,
model
=
_MlPerfTestModel
())
bench_cnn
.
run
()
logs
=
string_io
.
getvalue
().
splitlines
()
log_regexes
=
Counter
()
for
log
in
logs
:
for
regex
in
self
.
EXPECTED_LOG_REGEXES
:
if
regex
.
search
(
log
):
log_regexes
[
regex
]
+=
1
if
log_regexes
!=
self
.
EXPECTED_LOG_REGEXES
:
diff_counter
=
Counter
(
log_regexes
)
diff_counter
.
subtract
(
self
.
EXPECTED_LOG_REGEXES
)
differences
=
[]
for
regex
in
(
k
for
k
in
diff_counter
.
keys
()
if
diff_counter
[
k
]):
found_count
=
log_regexes
[
regex
]
expected_count
=
self
.
EXPECTED_LOG_REGEXES
[
regex
]
differences
.
append
(
' For regex %s: Found %d lines matching but '
'expected to find %d'
%
(
regex
.
pattern
,
found_count
,
expected_count
))
raise
AssertionError
(
'Logs did not match expected logs. Differences:
\n
'
'%s'
%
'
\n
'
.
join
(
differences
))
finally
:
mlperf_log
.
LOGGER
.
removeHandler
(
handler
)
if
__name__
==
'__main__'
:
tf
.
disable_v2_behavior
()
tf
.
test
.
main
()
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Alexnet model configuration.
References:
Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
ImageNet Classification with Deep Convolutional Neural Networks
Advances in Neural Information Processing Systems. 2012
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
from
models
import
model
class
AlexnetModel
(
model
.
CNNModel
):
"""Alexnet cnn model."""
def
__init__
(
self
,
params
=
None
):
super
(
AlexnetModel
,
self
).
__init__
(
'alexnet'
,
224
+
3
,
512
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
# Note: VALID requires padding the images by 3 in width and height
cnn
.
conv
(
64
,
11
,
11
,
4
,
4
,
'VALID'
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
conv
(
192
,
5
,
5
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
conv
(
384
,
3
,
3
)
cnn
.
conv
(
384
,
3
,
3
)
cnn
.
conv
(
256
,
3
,
3
)
cnn
.
mpool
(
3
,
3
,
2
,
2
)
cnn
.
reshape
([
-
1
,
256
*
6
*
6
])
cnn
.
affine
(
4096
)
cnn
.
dropout
()
cnn
.
affine
(
4096
)
cnn
.
dropout
()
class
AlexnetCifar10Model
(
model
.
CNNModel
):
"""Alexnet cnn model for cifar datasets.
The model architecture follows the one defined in the tensorflow tutorial
model.
Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
"""
def
__init__
(
self
,
params
=
None
):
super
(
AlexnetCifar10Model
,
self
).
__init__
(
'alexnet'
,
32
,
128
,
0.1
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
cnn
.
conv
(
64
,
5
,
5
,
1
,
1
,
'SAME'
,
stddev
=
5e-2
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
cnn
.
lrn
(
depth_radius
=
4
,
bias
=
1.0
,
alpha
=
0.001
/
9.0
,
beta
=
0.75
)
cnn
.
conv
(
64
,
5
,
5
,
1
,
1
,
'SAME'
,
bias
=
0.1
,
stddev
=
5e-2
)
cnn
.
lrn
(
depth_radius
=
4
,
bias
=
1.0
,
alpha
=
0.001
/
9.0
,
beta
=
0.75
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
shape
=
cnn
.
top_layer
.
get_shape
().
as_list
()
flat_dim
=
shape
[
1
]
*
shape
[
2
]
*
shape
[
3
]
cnn
.
reshape
([
-
1
,
flat_dim
])
cnn
.
affine
(
384
,
stddev
=
0.04
,
bias
=
0.1
)
cnn
.
affine
(
192
,
stddev
=
0.04
,
bias
=
0.1
)
def
get_learning_rate
(
self
,
global_step
,
batch_size
):
num_examples_per_epoch
=
50000
num_epochs_per_decay
=
100
decay_steps
=
(
num_epochs_per_decay
*
num_examples_per_epoch
//
batch_size
)
decay_factor
=
0.1
return
tf
.
train
.
exponential_decay
(
self
.
learning_rate
,
global_step
,
decay_steps
,
decay_factor
,
staircase
=
True
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Densenet model configuration.
References:
"Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
from
models
import
model
as
model_lib
class
DensenetCifar10Model
(
model_lib
.
CNNModel
):
"""Densenet cnn network configuration."""
def
__init__
(
self
,
model
,
layer_counts
,
growth_rate
,
params
=
None
):
self
.
growth_rate
=
growth_rate
super
(
DensenetCifar10Model
,
self
).
__init__
(
model
,
32
,
64
,
0.1
,
layer_counts
=
layer_counts
,
params
=
params
)
self
.
batch_norm_config
=
{
'decay'
:
0.9
,
'epsilon'
:
1e-5
,
'scale'
:
True
}
def
dense_block
(
self
,
cnn
,
growth_rate
):
input_layer
=
cnn
.
top_layer
c
=
cnn
.
batch_norm
(
input_layer
,
**
self
.
batch_norm_config
)
c
=
tf
.
nn
.
relu
(
c
)
c
=
cnn
.
conv
(
growth_rate
,
3
,
3
,
1
,
1
,
stddev
=
np
.
sqrt
(
2.0
/
9
/
growth_rate
),
activation
=
None
,
input_layer
=
c
)
channel_index
=
3
if
cnn
.
channel_pos
==
'channels_last'
else
1
cnn
.
top_layer
=
tf
.
concat
([
input_layer
,
c
],
channel_index
)
cnn
.
top_size
+=
growth_rate
def
transition_layer
(
self
,
cnn
):
in_size
=
cnn
.
top_size
cnn
.
batch_norm
(
**
self
.
batch_norm_config
)
cnn
.
top_layer
=
tf
.
nn
.
relu
(
cnn
.
top_layer
)
cnn
.
conv
(
in_size
,
1
,
1
,
1
,
1
,
stddev
=
np
.
sqrt
(
2.0
/
9
/
in_size
))
cnn
.
apool
(
2
,
2
,
2
,
2
)
def
add_inference
(
self
,
cnn
):
if
self
.
layer_counts
is
None
:
raise
ValueError
(
'Layer counts not specified for %s'
%
self
.
get_model
())
if
self
.
growth_rate
is
None
:
raise
ValueError
(
'Growth rate not specified for %s'
%
self
.
get_model
())
cnn
.
conv
(
16
,
3
,
3
,
1
,
1
,
activation
=
None
)
# Block 1
for
_
in
xrange
(
self
.
layer_counts
[
0
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
self
.
transition_layer
(
cnn
)
# Block 2
for
_
in
xrange
(
self
.
layer_counts
[
1
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
self
.
transition_layer
(
cnn
)
# Block 3
for
_
in
xrange
(
self
.
layer_counts
[
2
]):
self
.
dense_block
(
cnn
,
self
.
growth_rate
)
cnn
.
batch_norm
(
**
self
.
batch_norm_config
)
cnn
.
top_layer
=
tf
.
nn
.
relu
(
cnn
.
top_layer
)
channel_index
=
3
if
cnn
.
channel_pos
==
'channels_last'
else
1
cnn
.
top_size
=
cnn
.
top_layer
.
get_shape
().
as_list
()[
channel_index
]
cnn
.
spatial_mean
()
def
get_learning_rate
(
self
,
global_step
,
batch_size
):
num_batches_per_epoch
=
50000
//
batch_size
boundaries
=
num_batches_per_epoch
*
np
.
array
([
150
,
225
,
300
],
dtype
=
np
.
int64
)
boundaries
=
[
x
for
x
in
boundaries
]
values
=
[
0.1
,
0.01
,
0.001
,
0.0001
]
return
tf
.
train
.
piecewise_constant
(
global_step
,
boundaries
,
values
)
def
create_densenet40_k12_model
():
return
DensenetCifar10Model
(
'densenet40_k12'
,
(
12
,
12
,
12
),
12
)
def
create_densenet100_k12_model
():
return
DensenetCifar10Model
(
'densenet100_k12'
,
(
32
,
32
,
32
),
12
)
def
create_densenet100_k24_model
():
return
DensenetCifar10Model
(
'densenet100_k24'
,
(
32
,
32
,
32
),
24
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DeepSpeech2 model configuration.
References:
https://arxiv.org/abs/1512.02595
Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
itertools
import
numpy
as
np
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
import
tensorflow.compat.v1
as
tf
import
constants
from
cnn_util
import
log_fn
from
models
import
model
as
model_lib
from
tensorflow.python.ops
import
variables
# pylint: disable=g-direct-tensorflow-import
class
DeepSpeechDecoder
(
object
):
"""Greedy decoder implementation for Deep Speech model."""
def
__init__
(
self
,
labels
,
blank_index
=
28
):
"""Decoder initialization.
Args:
labels: a string specifying the speech labels for the decoder to use.
blank_index: an integer specifying index for the blank character. Defaults
to 28.
"""
self
.
labels
=
labels
self
.
blank_index
=
blank_index
self
.
int_to_char
=
dict
([(
i
,
c
)
for
(
i
,
c
)
in
enumerate
(
labels
)])
def
convert_to_string
(
self
,
sequence
):
"""Convert a sequence of indexes into corresponding string."""
return
''
.
join
([
self
.
int_to_char
[
i
]
for
i
in
sequence
])
def
wer
(
self
,
decode
,
target
):
"""Computes the Word Error Rate (WER).
WER is defined as the edit distance between the two provided sentences after
tokenizing to words.
Args:
decode: string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number for the WER of the current decode-target pair.
"""
try
:
from
nltk.metrics
import
distance
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'nltk.metrics'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental deepspeech model, you must '
'pip install -U nltk'
)
# Map each word to a new char.
words
=
set
(
decode
.
split
()
+
target
.
split
())
word2char
=
dict
(
zip
(
words
,
range
(
len
(
words
))))
new_decode
=
[
chr
(
word2char
[
w
])
for
w
in
decode
.
split
()]
new_target
=
[
chr
(
word2char
[
w
])
for
w
in
target
.
split
()]
return
distance
.
edit_distance
(
''
.
join
(
new_decode
),
''
.
join
(
new_target
))
def
cer
(
self
,
decode
,
target
):
"""Computes the Character Error Rate (CER).
CER is defined as the edit distance between the two given strings.
Args:
decode: a string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number denoting the CER for the current sentence pair.
"""
try
:
from
nltk.metrics
import
distance
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'nltk.metrics'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental deepspeech model, you must '
'pip install -U nltk'
)
return
distance
.
edit_distance
(
decode
,
target
)
def
decode
(
self
,
char_indexes
):
"""Decode the best guess from logits using greedy algorithm."""
# Merge repeated chars.
merge
=
[
k
for
k
,
_
in
itertools
.
groupby
(
char_indexes
)]
# Remove the blank index in the decoded sequence.
merge_remove_blank
=
[]
for
k
in
merge
:
if
k
!=
self
.
blank_index
:
merge_remove_blank
.
append
(
k
)
return
self
.
convert_to_string
(
merge_remove_blank
)
def
decode_logits
(
self
,
logits
):
"""Decode the best guess from logits using greedy algorithm."""
# Choose the class with maximimum probability.
best
=
list
(
np
.
argmax
(
logits
,
axis
=
1
))
return
self
.
decode
(
best
)
class
DeepSpeech2Model
(
model_lib
.
Model
):
"""Define DeepSpeech2 model."""
# Supported rnn cells.
SUPPORTED_RNNS
=
{
'lstm'
:
tf
.
nn
.
rnn_cell
.
BasicLSTMCell
,
'rnn'
:
tf
.
nn
.
rnn_cell
.
RNNCell
,
'gru'
:
tf
.
nn
.
rnn_cell
.
GRUCell
,
}
# Parameters for batch normalization.
BATCH_NORM_EPSILON
=
1e-5
BATCH_NORM_DECAY
=
0.997
# Filters of convolution layer
CONV_FILTERS
=
32
def
__init__
(
self
,
num_rnn_layers
=
5
,
rnn_type
=
'lstm'
,
is_bidirectional
=
True
,
rnn_hidden_size
=
800
,
use_bias
=
True
,
params
=
None
):
"""Initialize DeepSpeech2 model.
Args:
num_rnn_layers: an integer, the number of rnn layers (default: 5).
rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
rnn_hidden_size: an integer for the number of hidden units in the RNN
cell.
use_bias: a boolean specifying whether to use a bias in the last fc layer.
params: the params from BenchmarkCNN.
"""
super
(
DeepSpeech2Model
,
self
).
__init__
(
'deepspeech2'
,
batch_size
=
128
,
learning_rate
=
0.0005
,
fp16_loss_scale
=
128
,
params
=
params
)
self
.
num_rnn_layers
=
num_rnn_layers
self
.
rnn_type
=
rnn_type
self
.
is_bidirectional
=
is_bidirectional
self
.
rnn_hidden_size
=
rnn_hidden_size
self
.
use_bias
=
use_bias
self
.
num_feature_bins
=
161
self
.
max_time_steps
=
3494
self
.
max_label_length
=
576
def
_batch_norm
(
self
,
inputs
,
training
):
"""Batch normalization layer.
Note that the momentum to use will affect validation accuracy over time.
Batch norm has different behaviors during training/evaluation. With a large
momentum, the model takes longer to get a near-accurate estimation of the
moving mean/variance over the entire training dataset, which means we need
more iterations to see good evaluation results. If the training data is
evenly distributed over the feature space, we can also try setting a smaller
momentum (such as 0.1) to get good evaluation result sooner.
Args:
inputs: input data for batch norm layer.
training: a boolean to indicate if it is in training stage.
Returns:
tensor output from batch norm layer.
"""
return
tf
.
layers
.
batch_normalization
(
inputs
=
inputs
,
momentum
=
DeepSpeech2Model
.
BATCH_NORM_DECAY
,
epsilon
=
DeepSpeech2Model
.
BATCH_NORM_EPSILON
,
fused
=
True
,
training
=
training
)
def
_conv_bn_layer
(
self
,
inputs
,
padding
,
filters
,
kernel_size
,
strides
,
layer_id
,
training
):
"""Defines 2D convolutional + batch normalization layer.
Args:
inputs: input data for convolution layer.
padding: padding to be applied before convolution layer.
filters: an integer, number of output filters in the convolution.
kernel_size: a tuple specifying the height and width of the 2D convolution
window.
strides: a tuple specifying the stride length of the convolution.
layer_id: an integer specifying the layer index.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output from the current layer.
"""
# Perform symmetric padding on the feature dimension of time_step
# This step is required to avoid issues when RNN output sequence is shorter
# than the label length.
inputs
=
tf
.
pad
(
inputs
,
[[
0
,
0
],
[
padding
[
0
],
padding
[
0
]],
[
padding
[
1
],
padding
[
1
]],
[
0
,
0
]])
inputs
=
tf
.
layers
.
conv2d
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
kernel_size
,
strides
=
strides
,
padding
=
'valid'
,
use_bias
=
False
,
activation
=
tf
.
nn
.
relu6
,
name
=
'cnn_{}'
.
format
(
layer_id
))
return
self
.
_batch_norm
(
inputs
,
training
)
def
_rnn_layer
(
self
,
inputs
,
rnn_cell
,
rnn_hidden_size
,
layer_id
,
use_batch_norm
,
is_bidirectional
,
training
):
"""Defines a batch normalization + rnn layer.
Args:
inputs: input tensors for the current layer.
rnn_cell: RNN cell instance to use.
rnn_hidden_size: an integer for the dimensionality of the rnn output
space.
layer_id: an integer for the index of current layer.
use_batch_norm: a boolean specifying whether to perform batch
normalization on input states.
is_bidirectional: a boolean specifying whether the rnn layer is
bi-directional.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output for the current layer.
"""
if
use_batch_norm
:
inputs
=
self
.
_batch_norm
(
inputs
,
training
)
# Construct forward/backward RNN cells.
fw_cell
=
rnn_cell
(
num_units
=
rnn_hidden_size
,
name
=
'rnn_fw_{}'
.
format
(
layer_id
))
if
is_bidirectional
:
bw_cell
=
rnn_cell
(
num_units
=
rnn_hidden_size
,
name
=
'rnn_bw_{}'
.
format
(
layer_id
))
outputs
,
_
=
tf
.
nn
.
bidirectional_dynamic_rnn
(
cell_fw
=
fw_cell
,
cell_bw
=
bw_cell
,
inputs
=
inputs
,
dtype
=
tf
.
float32
,
swap_memory
=
True
)
rnn_outputs
=
tf
.
concat
(
outputs
,
-
1
)
else
:
rnn_outputs
=
tf
.
nn
.
dynamic_rnn
(
fw_cell
,
inputs
,
dtype
=
tf
.
float32
,
swap_memory
=
True
)
return
rnn_outputs
def
get_input_data_types
(
self
,
subset
):
"""Returns the list of data types of the inputs."""
del
subset
# Same data types for both train and validation subsets.
return
[
self
.
data_type
,
tf
.
int32
,
tf
.
int32
,
tf
.
int32
]
def
get_input_shapes
(
self
,
subset
):
"""Returns the list of shapes of the padded inputs."""
del
subset
# Same shapes for both train and validation subsets
return
[
[
self
.
batch_size
,
self
.
max_time_steps
,
self
.
num_feature_bins
,
1
],
[
self
.
batch_size
,
self
.
max_label_length
],
[
self
.
batch_size
,
1
],
[
self
.
batch_size
,
1
],
]
def
get_synthetic_inputs
(
self
,
input_name
,
nclass
):
inputs
=
tf
.
random_uniform
(
self
.
get_input_shapes
(
'train'
)[
0
],
dtype
=
self
.
get_input_data_types
(
'train'
)[
0
])
inputs
=
variables
.
VariableV1
(
inputs
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
input_name
)
labels
=
tf
.
convert_to_tensor
(
np
.
random
.
randint
(
28
,
size
=
[
self
.
batch_size
,
self
.
max_label_length
]))
input_lengths
=
tf
.
convert_to_tensor
(
[
self
.
max_time_steps
]
*
self
.
batch_size
)
label_lengths
=
tf
.
convert_to_tensor
(
[
self
.
max_label_length
]
*
self
.
batch_size
)
return
[
inputs
,
labels
,
input_lengths
,
label_lengths
]
# TODO(laigd): support fp16.
# TODO(laigd): support multiple gpus.
def
build_network
(
self
,
inputs
,
phase_train
=
True
,
nclass
=
29
):
"""Builds the forward pass of the deepspeech2 model.
Args:
inputs: The input list of the model.
phase_train: True during training. False during evaluation.
nclass: Number of classes that the input spectrogram can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
inputs
=
inputs
[
0
]
# Get the spectrogram feature.
# Two cnn layers.
inputs
=
self
.
_conv_bn_layer
(
inputs
,
padding
=
(
20
,
5
),
filters
=
DeepSpeech2Model
.
CONV_FILTERS
,
kernel_size
=
(
41
,
11
),
strides
=
(
2
,
2
),
layer_id
=
1
,
training
=
phase_train
)
inputs
=
self
.
_conv_bn_layer
(
inputs
,
padding
=
(
10
,
5
),
filters
=
DeepSpeech2Model
.
CONV_FILTERS
,
kernel_size
=
(
21
,
11
),
strides
=
(
2
,
1
),
layer_id
=
2
,
training
=
phase_train
)
# output of conv_layer2 with the shape of
# [batch_size (N), times (T), features (F), channels (C)].
# Convert the conv output to rnn input.
# batch_size = tf.shape(inputs)[0]
feat_size
=
inputs
.
get_shape
().
as_list
()[
2
]
inputs
=
tf
.
reshape
(
inputs
,
[
self
.
batch_size
,
-
1
,
feat_size
*
DeepSpeech2Model
.
CONV_FILTERS
])
# RNN layers.
rnn_cell
=
DeepSpeech2Model
.
SUPPORTED_RNNS
[
self
.
rnn_type
]
for
layer_counter
in
xrange
(
self
.
num_rnn_layers
):
# No batch normalization on the first layer.
use_batch_norm
=
(
layer_counter
!=
0
)
inputs
=
self
.
_rnn_layer
(
inputs
,
rnn_cell
,
self
.
rnn_hidden_size
,
layer_counter
+
1
,
use_batch_norm
,
self
.
is_bidirectional
,
phase_train
)
# FC layer with batch norm.
inputs
=
self
.
_batch_norm
(
inputs
,
phase_train
)
logits
=
tf
.
layers
.
dense
(
inputs
,
nclass
,
use_bias
=
self
.
use_bias
)
return
model_lib
.
BuildNetworkResult
(
logits
=
logits
,
extra_info
=
None
)
def
loss_function
(
self
,
inputs
,
build_network_result
):
"""Computes the ctc loss for the current batch of predictions.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
logits
=
build_network_result
.
logits
actual_time_steps
=
inputs
[
2
]
probs
=
tf
.
nn
.
softmax
(
logits
)
ctc_time_steps
=
tf
.
shape
(
probs
)[
1
]
ctc_input_length
=
tf
.
to_float
(
tf
.
multiply
(
actual_time_steps
,
ctc_time_steps
))
ctc_input_length
=
tf
.
to_int32
(
tf
.
floordiv
(
ctc_input_length
,
tf
.
to_float
(
self
.
max_time_steps
)))
label_length
=
inputs
[
3
]
label_length
=
tf
.
to_int32
(
tf
.
squeeze
(
label_length
))
ctc_input_length
=
tf
.
to_int32
(
tf
.
squeeze
(
ctc_input_length
))
labels
=
inputs
[
1
]
sparse_labels
=
tf
.
to_int32
(
tf
.
keras
.
backend
.
ctc_label_dense_to_sparse
(
labels
,
label_length
))
y_pred
=
tf
.
log
(
tf
.
transpose
(
probs
,
perm
=
[
1
,
0
,
2
])
+
tf
.
keras
.
backend
.
epsilon
())
losses
=
tf
.
expand_dims
(
tf
.
nn
.
ctc_loss
(
labels
=
sparse_labels
,
inputs
=
y_pred
,
sequence_length
=
ctc_input_length
,
ignore_longer_outputs_than_inputs
=
True
),
axis
=
1
)
loss
=
tf
.
reduce_mean
(
losses
)
return
loss
PROBABILITY_TENSOR
=
'deepspeech2_prob'
LABEL_TENSOR
=
'deepspeech2_label'
def
accuracy_function
(
self
,
inputs
,
logits
):
"""Returns the ops to evaluate the model performance."""
# Get probabilities of each predicted class
probs
=
tf
.
nn
.
softmax
(
logits
)
assert
probs
.
shape
.
as_list
()[
0
]
==
self
.
batch_size
return
{
(
constants
.
UNREDUCED_ACCURACY_OP_PREFIX
+
self
.
PROBABILITY_TENSOR
):
probs
,
(
constants
.
UNREDUCED_ACCURACY_OP_PREFIX
+
self
.
LABEL_TENSOR
):
inputs
[
1
],
}
def
postprocess
(
self
,
results
):
"""Postprocess results returned from model in Python."""
probs
=
results
[
self
.
PROBABILITY_TENSOR
]
total_wer
,
total_cer
=
0
,
0
speech_labels
=
" abcdefghijklmnopqrstuvwxyz'-"
greedy_decoder
=
DeepSpeechDecoder
(
speech_labels
)
# Evaluate the performance using WER (Word Error Rate) and CER (Character
# Error Rate) as metrics.
targets
=
results
[
self
.
LABEL_TENSOR
]
# The ground truth transcript
for
i
in
range
(
self
.
batch_size
):
# Decode string.
predicted_str
=
greedy_decoder
.
decode_logits
(
probs
[
i
])
expected_str
=
greedy_decoder
.
decode
(
targets
[
i
])
# Compute CER.
total_cer
+=
(
greedy_decoder
.
cer
(
predicted_str
,
expected_str
)
/
len
(
expected_str
))
# Compute WER.
total_wer
+=
(
greedy_decoder
.
wer
(
predicted_str
,
expected_str
)
/
len
(
expected_str
.
split
()))
# Get mean value
total_cer
/=
self
.
batch_size
total_wer
/=
self
.
batch_size
log_fn
(
'total CER: {:f}; total WER: {:f}; total example: {:d}.'
.
format
(
total_cer
,
total_wer
,
self
.
batch_size
))
# TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
return
{
'top_1_accuracy'
:
0.
,
'top_5_accuracy'
:
0.
}
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
intended to be used only with CNNs.
Only synthetic data with 1 GPU is currently supported.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow.compat.v1
as
tf
from
models
import
model
# Obtained by running the official NCF model with the following command:
# python ncf_main.py --dataset ml-20m
# and printing the number of users and items here:
# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
_NUM_USERS_20M
=
138493
_NUM_ITEMS_20M
=
26744
# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
# uses, ignore variable_scopes, which we rely on for multi-GPU support.
# TODO(reedwm): Support real data. This will require a significant refactor.
# TODO(reedwm): All-reduce IndexedSlices more effectively.
# TODO(reedwm): Support the 1M variant of this model.
class
NcfModel
(
model
.
Model
):
r
"""A model.Model wrapper around the official NCF recommendation model.
To do an NCF run with synthetic data that roughly matches what the official
model does, run:
python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
--weight_decay=0 --sparse_to_dense_grads
"""
def
__init__
(
self
,
params
=
None
):
super
(
NcfModel
,
self
).
__init__
(
'official_ncf'
,
batch_size
=
2048
,
learning_rate
=
0.0005
,
fp16_loss_scale
=
128
,
params
=
params
)
if
self
.
fp16_vars
:
raise
ValueError
(
'NCF model only supports float32 variables for now.'
)
def
build_network
(
self
,
inputs
,
phase_train
=
True
,
nclass
=
1001
):
try
:
from
official.recommendation
import
neumf_model
# pylint: disable=g-import-not-at-top
except
ImportError
as
e
:
if
'neumf_model'
not
in
e
.
message
:
raise
raise
ImportError
(
'To use the experimental NCF model, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models to the PYTHONPATH.'
)
del
nclass
users
,
items
,
_
=
inputs
params
=
{
'num_users'
:
_NUM_USERS_20M
,
'num_items'
:
_NUM_ITEMS_20M
,
'model_layers'
:
(
256
,
256
,
128
,
64
),
'mf_dim'
:
64
,
'mf_regularization'
:
0
,
'mlp_reg_layers'
:
(
0
,
0
,
0
,
0
),
'use_tpu'
:
False
}
user_input
=
tf
.
keras
.
layers
.
Input
(
tensor
=
users
,
name
=
'user_input'
)
item_input
=
tf
.
keras
.
layers
.
Input
(
tensor
=
items
,
name
=
'item_input'
)
if
self
.
data_type
==
tf
.
float32
:
keras_model
=
neumf_model
.
construct_model
(
user_input
,
item_input
,
params
)
logits
=
keras_model
.
output
else
:
assert
self
.
data_type
==
tf
.
float16
old_floatx
=
tf
.
keras
.
backend
.
floatx
()
try
:
tf
.
keras
.
backend
.
set_floatx
(
'float16'
)
# We cannot rely on the variable_scope's fp16 custom getter here,
# because the NCF model uses keras layers, which ignore variable scopes.
# So we use a variable_creator_scope instead.
with
tf
.
variable_creator_scope
(
_fp16_variable_creator
):
keras_model
=
neumf_model
.
construct_model
(
user_input
,
item_input
,
params
)
logits
=
tf
.
cast
(
keras_model
.
output
,
tf
.
float32
)
finally
:
tf
.
keras
.
backend
.
set_floatx
(
old_floatx
)
return
model
.
BuildNetworkResult
(
logits
=
logits
,
extra_info
=
None
)
def
loss_function
(
self
,
inputs
,
build_network_result
):
logits
=
build_network_result
.
logits
# Softmax with the first column of ones is equivalent to sigmoid.
# TODO(reedwm): Actually, the first column should be zeros to be equivalent
# to sigmoid. But, we keep it at ones to match the official models.
logits
=
tf
.
concat
([
tf
.
ones
(
logits
.
shape
,
dtype
=
logits
.
dtype
),
logits
],
axis
=
1
)
return
tf
.
losses
.
sparse_softmax_cross_entropy
(
labels
=
inputs
[
2
],
logits
=
logits
)
def
get_synthetic_inputs
(
self
,
input_name
,
nclass
):
"""Returns the ops to generate synthetic inputs and labels."""
def
users_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,
1
),
minval
=
0
,
maxval
=
_NUM_USERS_20M
,
dtype
=
tf
.
int32
)
users
=
tf
.
Variable
(
users_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_users'
)
def
items_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,
1
),
minval
=
0
,
maxval
=
_NUM_ITEMS_20M
,
dtype
=
tf
.
int32
)
items
=
tf
.
Variable
(
items_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_items'
)
def
labels_init_val
():
return
tf
.
random_uniform
((
self
.
batch_size
,),
minval
=
0
,
maxval
=
2
,
dtype
=
tf
.
int32
)
labels
=
tf
.
Variable
(
labels_init_val
,
dtype
=
tf
.
int32
,
trainable
=
False
,
collections
=
[
tf
.
GraphKeys
.
LOCAL_VARIABLES
],
name
=
'synthetic_labels'
)
return
[
users
,
items
,
labels
]
def
get_input_shapes
(
self
,
subset
):
del
subset
return
[[
self
.
batch_size
,
1
],
[
self
.
batch_size
,
1
],
[
self
.
batch_size
]]
def
get_input_data_types
(
self
,
subset
):
del
subset
return
[
self
.
int32
,
tf
.
int32
,
tf
.
int32
]
def
_fp16_variable_creator
(
next_creator
,
**
kwargs
):
"""Variable creator to create variables in fp32 and cast them to fp16."""
dtype
=
kwargs
.
get
(
'dtype'
,
None
)
initial_value
=
kwargs
.
get
(
'initial_value'
,
None
)
if
dtype
is
None
:
if
initial_value
is
not
None
and
not
callable
(
initial_value
):
dtype
=
initial_value
.
dtype
if
dtype
==
tf
.
float16
:
if
callable
(
initial_value
):
new_initial_value
=
lambda
:
tf
.
cast
(
initial_value
(),
tf
.
float32
)
else
:
new_initial_value
=
tf
.
cast
(
initial_value
,
tf
.
float32
)
kwargs
[
'dtype'
]
=
tf
.
float32
kwargs
[
'initial_value'
]
=
new_initial_value
var
=
next_creator
(
**
kwargs
)
return
tf
.
cast
(
var
,
dtype
=
tf
.
float16
)
else
:
return
next_creator
(
**
kwargs
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Googlenet model configuration.
References:
Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
Going deeper with convolutions
arXiv preprint arXiv:1409.4842 (2014)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
models
import
model
class
GooglenetModel
(
model
.
CNNModel
):
"""GoogLeNet."""
def
__init__
(
self
,
params
=
None
):
super
(
GooglenetModel
,
self
).
__init__
(
'googlenet'
,
224
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v1
(
cnn
,
k
,
l
,
m
,
n
,
p
,
q
):
cols
=
[[(
'conv'
,
k
,
1
,
1
)],
[(
'conv'
,
l
,
1
,
1
),
(
'conv'
,
m
,
3
,
3
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
p
,
5
,
5
)],
[(
'mpool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
q
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v1'
,
cols
)
cnn
.
conv
(
64
,
7
,
7
,
2
,
2
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
cnn
.
conv
(
64
,
1
,
1
)
cnn
.
conv
(
192
,
3
,
3
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
64
,
96
,
128
,
16
,
32
,
32
)
inception_v1
(
cnn
,
128
,
128
,
192
,
32
,
96
,
64
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
192
,
96
,
208
,
16
,
48
,
64
)
inception_v1
(
cnn
,
160
,
112
,
224
,
24
,
64
,
64
)
inception_v1
(
cnn
,
128
,
128
,
256
,
24
,
64
,
64
)
inception_v1
(
cnn
,
112
,
144
,
288
,
32
,
64
,
64
)
inception_v1
(
cnn
,
256
,
160
,
320
,
32
,
128
,
128
)
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'SAME'
)
inception_v1
(
cnn
,
256
,
160
,
320
,
32
,
128
,
128
)
inception_v1
(
cnn
,
384
,
192
,
384
,
48
,
128
,
128
)
cnn
.
apool
(
7
,
7
,
1
,
1
,
mode
=
'VALID'
)
cnn
.
reshape
([
-
1
,
1024
])
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Inception model configuration.
Includes multiple models: inception3, inception4, inception-resnet2.
References:
Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
Inception-v4, Inception-ResNet and the Impact of Residual Connections on
Learning
Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
Going Deeper with Convolutions
http://arxiv.org/pdf/1409.4842v1.pdf
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
Zbigniew Wojna
Rethinking the Inception Architecture for Computer Vision
arXiv preprint arXiv:1512.00567 (2015)
Inception v3 model: http://arxiv.org/abs/1512.00567
Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
from
models
import
model
class
Inceptionv3Model
(
model
.
CNNModel
):
"""InceptionV3."""
def
__init__
(
self
,
auxiliary
=
False
,
params
=
None
):
self
.
_auxiliary
=
auxiliary
super
(
Inceptionv3Model
,
self
).
__init__
(
'inception3'
,
299
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v3_a
(
cnn
,
n
):
cols
=
[[(
'conv'
,
64
,
1
,
1
)],
[(
'conv'
,
48
,
1
,
1
),
(
'conv'
,
64
,
5
,
5
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
)],
[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
n
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_a'
,
cols
)
def
inception_v3_b
(
cnn
):
cols
=
[[(
'conv'
,
384
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v3_b'
,
cols
)
def
inception_v3_c
(
cnn
,
n
):
cols
=
[[(
'conv'
,
192
,
1
,
1
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
n
,
1
,
7
),
(
'conv'
,
192
,
7
,
1
)],
[(
'conv'
,
n
,
1
,
1
),
(
'conv'
,
n
,
7
,
1
),
(
'conv'
,
n
,
1
,
7
),
(
'conv'
,
n
,
7
,
1
),
(
'conv'
,
192
,
1
,
7
)],
[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
192
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_c'
,
cols
)
def
inception_v3_d
(
cnn
):
cols
=
[[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
320
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
1
,
7
),
(
'conv'
,
192
,
7
,
1
),
(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v3_d'
,
cols
)
def
inception_v3_e
(
cnn
,
pooltype
):
cols
=
[[(
'conv'
,
320
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
384
,
1
,
3
)],
[(
'share'
,),
(
'conv'
,
384
,
3
,
1
)],
[(
'conv'
,
448
,
1
,
1
),
(
'conv'
,
384
,
3
,
3
),
(
'conv'
,
384
,
1
,
3
)],
[(
'share'
,),
(
'share'
,),
(
'conv'
,
384
,
3
,
1
)],
[(
'mpool'
if
pooltype
==
'max'
else
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
192
,
1
,
1
)]]
cnn
.
inception_module
(
'incept_v3_e'
,
cols
)
def
incept_v3_aux
(
cnn
):
assert
cnn
.
aux_top_layer
is
None
cnn
.
aux_top_layer
=
cnn
.
top_layer
cnn
.
aux_top_size
=
cnn
.
top_size
with
cnn
.
switch_to_aux_top_layer
():
cnn
.
apool
(
5
,
5
,
3
,
3
,
mode
=
'VALID'
)
cnn
.
conv
(
128
,
1
,
1
,
mode
=
'SAME'
)
cnn
.
conv
(
768
,
5
,
5
,
mode
=
'VALID'
,
stddev
=
0.01
)
cnn
.
reshape
([
-
1
,
768
])
cnn
.
use_batch_norm
=
True
cnn
.
conv
(
32
,
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
# 299 x 299 x 3
cnn
.
conv
(
32
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
# 149 x 149 x 32
cnn
.
conv
(
64
,
3
,
3
,
1
,
1
,
mode
=
'SAME'
)
# 147 x 147 x 64
cnn
.
mpool
(
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
# 147 x 147 x 64
cnn
.
conv
(
80
,
1
,
1
,
1
,
1
,
mode
=
'VALID'
)
# 73 x 73 x 80
cnn
.
conv
(
192
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
# 71 x 71 x 192
cnn
.
mpool
(
3
,
3
,
2
,
2
,
'VALID'
)
# 35 x 35 x 192
inception_v3_a
(
cnn
,
32
)
# 35 x 35 x 256 mixed.
inception_v3_a
(
cnn
,
64
)
# 35 x 35 x 288 mixed_1.
inception_v3_a
(
cnn
,
64
)
# 35 x 35 x 288 mixed_2
inception_v3_b
(
cnn
)
# 17 x 17 x 768 mixed_3
inception_v3_c
(
cnn
,
128
)
# 17 x 17 x 768 mixed_4
inception_v3_c
(
cnn
,
160
)
# 17 x 17 x 768 mixed_5
inception_v3_c
(
cnn
,
160
)
# 17 x 17 x 768 mixed_6
inception_v3_c
(
cnn
,
192
)
# 17 x 17 x 768 mixed_7
if
self
.
_auxiliary
:
incept_v3_aux
(
cnn
)
# Auxillary Head logits
inception_v3_d
(
cnn
)
# 17 x 17 x 1280 mixed_8
inception_v3_e
(
cnn
,
'avg'
)
# 8 x 8 x 2048 mixed_9
inception_v3_e
(
cnn
,
'max'
)
# 8 x 8 x 2048 mixed_10
cnn
.
apool
(
8
,
8
,
1
,
1
,
'VALID'
)
# 8 x 8 x 2048
cnn
.
reshape
([
-
1
,
2048
])
# 1 x 1 x 2048
# Stem functions
def
inception_v4_sa
(
cnn
):
cols
=
[[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
96
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sa'
,
cols
)
def
inception_v4_sb
(
cnn
):
cols
=
[[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
,
1
,
1
,
'VALID'
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
64
,
7
,
1
),
(
'conv'
,
64
,
1
,
7
),
(
'conv'
,
96
,
3
,
3
,
1
,
1
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sb'
,
cols
)
def
inception_v4_sc
(
cnn
):
cols
=
[[(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_sc'
,
cols
)
# Reduction functions
def
inception_v4_ra
(
cnn
,
k
,
l
,
m
,
n
):
cols
=
[
[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
n
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
k
,
1
,
1
),
(
'conv'
,
l
,
3
,
3
),
(
'conv'
,
m
,
3
,
3
,
2
,
2
,
'VALID'
)]
]
cnn
.
inception_module
(
'incept_v4_ra'
,
cols
)
def
inception_v4_rb
(
cnn
):
cols
=
[[(
'mpool'
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
3
,
3
,
2
,
2
,
'VALID'
)],
[(
'conv'
,
256
,
1
,
1
),
(
'conv'
,
256
,
1
,
7
),
(
'conv'
,
320
,
7
,
1
),
(
'conv'
,
320
,
3
,
3
,
2
,
2
,
'VALID'
)]]
cnn
.
inception_module
(
'incept_v4_rb'
,
cols
)
class
Inceptionv4Model
(
model
.
CNNModel
):
"""Inceptionv4."""
def
__init__
(
self
,
params
=
None
):
super
(
Inceptionv4Model
,
self
).
__init__
(
'inception4'
,
299
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
def
inception_v4_a
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
96
,
1
,
1
)],
[(
'conv'
,
96
,
1
,
1
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
)],
[(
'conv'
,
64
,
1
,
1
),
(
'conv'
,
96
,
3
,
3
),
(
'conv'
,
96
,
3
,
3
)]]
cnn
.
inception_module
(
'incept_v4_a'
,
cols
)
def
inception_v4_b
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
128
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
224
,
1
,
7
),
(
'conv'
,
256
,
7
,
1
)],
[(
'conv'
,
192
,
1
,
1
),
(
'conv'
,
192
,
1
,
7
),
(
'conv'
,
224
,
7
,
1
),
(
'conv'
,
224
,
1
,
7
),
(
'conv'
,
256
,
7
,
1
)]]
cnn
.
inception_module
(
'incept_v4_b'
,
cols
)
def
inception_v4_c
(
cnn
):
cols
=
[[(
'apool'
,
3
,
3
,
1
,
1
,
'SAME'
),
(
'conv'
,
256
,
1
,
1
)],
[(
'conv'
,
256
,
1
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
256
,
1
,
3
)],
[(
'share'
,),
(
'conv'
,
256
,
3
,
1
)],
[(
'conv'
,
384
,
1
,
1
),
(
'conv'
,
448
,
1
,
3
),
(
'conv'
,
512
,
3
,
1
),
(
'conv'
,
256
,
3
,
1
)],
[(
'share'
,),
(
'share'
,),
(
'share'
,),
(
'conv'
,
256
,
1
,
3
)]]
cnn
.
inception_module
(
'incept_v4_c'
,
cols
)
cnn
.
use_batch_norm
=
True
cnn
.
conv
(
32
,
3
,
3
,
2
,
2
,
mode
=
'VALID'
)
cnn
.
conv
(
32
,
3
,
3
,
1
,
1
,
mode
=
'VALID'
)
cnn
.
conv
(
64
,
3
,
3
)
inception_v4_sa
(
cnn
)
inception_v4_sb
(
cnn
)
inception_v4_sc
(
cnn
)
for
_
in
xrange
(
4
):
inception_v4_a
(
cnn
)
inception_v4_ra
(
cnn
,
192
,
224
,
256
,
384
)
for
_
in
xrange
(
7
):
inception_v4_b
(
cnn
)
inception_v4_rb
(
cnn
)
for
_
in
xrange
(
3
):
inception_v4_c
(
cnn
)
cnn
.
spatial_mean
()
cnn
.
dropout
(
0.8
)
TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
deleted
100644 → 0
View file @
e286da17
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Lenet model configuration.
References:
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
Gradient-based learning applied to document recognition
Proceedings of the IEEE (1998)
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
models
import
model
class
Lenet5Model
(
model
.
CNNModel
):
"""Lenet5."""
def
__init__
(
self
,
params
=
None
):
super
(
Lenet5Model
,
self
).
__init__
(
'lenet5'
,
28
,
32
,
0.005
,
params
=
params
)
def
add_inference
(
self
,
cnn
):
# Note: This matches TF's MNIST tutorial model
cnn
.
conv
(
32
,
5
,
5
)
cnn
.
mpool
(
2
,
2
)
cnn
.
conv
(
64
,
5
,
5
)
cnn
.
mpool
(
2
,
2
)
cnn
.
reshape
([
-
1
,
64
*
7
*
7
])
cnn
.
affine
(
512
)
Prev
1
2
3
4
5
6
7
8
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment