Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
cbf29854
Commit
cbf29854
authored
Sep 12, 2019
by
Ayush Dubey
Committed by
A. Unique TensorFlower
Sep 12, 2019
Browse files
Add Keras-based ResNet50 multi-worker accuracy tests.
PiperOrigin-RevId: 268703439
parent
34d59895
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
85 additions
and
2 deletions
+85
-2
official/benchmark/keras_imagenet_benchmark.py
official/benchmark/keras_imagenet_benchmark.py
+85
-2
No files found.
official/benchmark/keras_imagenet_benchmark.py
View file @
cbf29854
...
@@ -966,6 +966,89 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
...
@@ -966,6 +966,89 @@ class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
log_steps
=
FLAGS
.
log_steps
)
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50MultiWorkerKerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Resnet50 distributed accuracy tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
flag_methods
=
[
resnet_imagenet_main
.
define_imagenet_keras_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
def
_benchmark_common
(
self
,
eager
,
num_workers
,
all_reduce_alg
):
"""Common to all benchmarks in this class."""
self
.
_setup
()
num_gpus
=
8
FLAGS
.
num_gpus
=
num_gpus
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
dtype
=
'fp16'
FLAGS
.
enable_eager
=
eager
FLAGS
.
enable_xla
=
False
FLAGS
.
distribution_strategy
=
'multi_worker_mirrored'
FLAGS
.
use_tensor_lr
=
True
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_{}_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
'eager'
if
eager
else
'graph'
,
num_workers
,
all_reduce_alg
))
FLAGS
.
batch_size
=
256
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
self
.
_run_and_report_benchmark
()
def
_run_and_report_benchmark
(
self
,
top_1_min
=
MIN_TOP_1_ACCURACY
,
top_1_max
=
MAX_TOP_1_ACCURACY
):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
)
def
_get_model_dir
(
self
,
folder_name
):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
def
benchmark_graph_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""Legacy graph, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
False
,
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_graph_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""Legacy graph, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
False
,
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_graph_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""Legacy graph, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
False
,
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_graph_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""Legacy graph, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
False
,
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
def
benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
2
,
all_reduce_alg
=
'nccl'
)
def
benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'ring'
)
def
benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked
(
self
):
"""Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
self
.
_benchmark_common
(
eager
=
True
,
num_workers
=
8
,
all_reduce_alg
=
'nccl'
)
class
Resnet50MultiWorkerKerasBenchmark
(
Resnet50KerasBenchmarkBase
):
class
Resnet50MultiWorkerKerasBenchmark
(
Resnet50KerasBenchmarkBase
):
"""Resnet50 distributed benchmark tests with multiple workers."""
"""Resnet50 distributed benchmark tests with multiple workers."""
...
@@ -986,8 +1069,8 @@ class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase):
...
@@ -986,8 +1069,8 @@ class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase):
FLAGS
.
use_tensor_lr
=
True
FLAGS
.
use_tensor_lr
=
True
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_
graph
_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
'benchmark_
{}
_8_gpu_{}_worker_fp16_{}_tweaked'
.
format
(
num_workers
,
all_reduce_alg
))
'eager'
if
eager
else
'graph'
,
num_workers
,
all_reduce_alg
))
FLAGS
.
batch_size
=
256
*
num_gpus
*
num_workers
FLAGS
.
batch_size
=
256
*
num_gpus
*
num_workers
FLAGS
.
all_reduce_alg
=
all_reduce_alg
FLAGS
.
all_reduce_alg
=
all_reduce_alg
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment