Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
06f22a59
Commit
06f22a59
authored
Oct 28, 2019
by
Zongwei Zhou
Committed by
A. Unique TensorFlower
Oct 28, 2019
Browse files
Add Resnet50 benchmark suite that read training data from remote storage
PiperOrigin-RevId: 277082247
parent
b62439d7
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
64 additions
and
14 deletions
+64
-14
official/benchmark/keras_imagenet_benchmark.py
official/benchmark/keras_imagenet_benchmark.py
+31
-7
official/utils/flags/_performance.py
official/utils/flags/_performance.py
+15
-1
official/vision/image_classification/common.py
official/vision/image_classification/common.py
+4
-3
official/vision/image_classification/imagenet_preprocessing.py
...ial/vision/image_classification/imagenet_preprocessing.py
+10
-1
official/vision/image_classification/resnet_imagenet_main.py
official/vision/image_classification/resnet_imagenet_main.py
+4
-2
No files found.
official/benchmark/keras_imagenet_benchmark.py
View file @
06f22a59
...
@@ -201,13 +201,14 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
...
@@ -201,13 +201,14 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
flag_methods
=
flag_methods
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
)
default_flags
=
default_flags
)
def
_run_and_report_benchmark
(
self
):
def
_run_and_report_benchmark
(
self
,
skip_steps
=
None
):
start_time_sec
=
time
.
time
()
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_main
.
run
(
FLAGS
)
stats
=
resnet_imagenet_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Number of logged step time entries that are excluded in performance
# Number of logged step time entries that are excluded in performance
# report. We keep results from last 100 batches in this case.
# report. We keep results from last 100 batches, or skip the steps based on
warmup
=
(
FLAGS
.
train_steps
-
100
)
//
FLAGS
.
log_steps
# input skip_steps.
warmup
=
(
skip_steps
or
(
FLAGS
.
train_steps
-
100
))
//
FLAGS
.
log_steps
super
(
Resnet50KerasBenchmarkBase
,
self
).
_report_benchmark
(
super
(
Resnet50KerasBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
stats
,
...
@@ -845,6 +846,29 @@ class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
...
@@ -845,6 +846,29 @@ class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
output_dir
=
output_dir
,
default_flags
=
def_flags
)
output_dir
=
output_dir
,
default_flags
=
def_flags
)
class
Resnet50KerasBenchmarkRemoteData
(
Resnet50KerasBenchmarkBase
):
"""Resnet50 real data (stored in remote storage) benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
# Defining multiple epochs overrides the train_steps setting in benchmarks.
def_flags
[
'train_epochs'
]
=
2
# Cache dataset so performance is stable after the first epoch.
def_flags
[
'training_dataset_cache'
]
=
True
def_flags
[
'log_steps'
]
=
100
super
(
Resnet50KerasBenchmarkRemoteData
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
def
_run_and_report_benchmark
(
self
):
# skip the first epoch for performance measurement.
super
(
Resnet50KerasBenchmarkRemoteData
,
self
).
_run_and_report_benchmark
(
skip_steps
=
600
)
class
TrivialKerasBenchmarkReal
(
keras_benchmark
.
KerasBenchmark
):
class
TrivialKerasBenchmarkReal
(
keras_benchmark
.
KerasBenchmark
):
"""Trivial model with real data benchmark tests."""
"""Trivial model with real data benchmark tests."""
...
...
official/utils/flags/_performance.py
View file @
06f22a59
...
@@ -63,7 +63,8 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
...
@@ -63,7 +63,8 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
dynamic_loss_scale
=
False
,
fp16_implementation
=
False
,
dynamic_loss_scale
=
False
,
fp16_implementation
=
False
,
loss_scale
=
False
,
loss_scale
=
False
,
tf_data_experimental_slack
=
False
,
enable_xla
=
False
,
tf_data_experimental_slack
=
False
,
enable_xla
=
False
,
force_v2_in_keras_compile
=
False
):
force_v2_in_keras_compile
=
False
,
training_dataset_cache
=
False
):
"""Register flags for specifying performance tuning arguments.
"""Register flags for specifying performance tuning arguments.
Args:
Args:
...
@@ -92,6 +93,9 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
...
@@ -92,6 +93,9 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
force_v2_in_keras_compile: Forces the use of run_distribued path even if not
using a `strategy`. This is not the same as
using a `strategy`. This is not the same as
`tf.distribute.OneDeviceStrategy`
`tf.distribute.OneDeviceStrategy`
training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in
remote storage and can fit into worker memory.
Returns:
Returns:
A list of flags for core.py to marks as key flags.
A list of flags for core.py to marks as key flags.
...
@@ -262,6 +266,16 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
...
@@ -262,6 +266,16 @@ def define_performance(num_parallel_calls=False, inter_op=False, intra_op=False,
"map and batch from tf.data."
)
"map and batch from tf.data."
)
)
)
if
training_dataset_cache
:
flags
.
DEFINE_boolean
(
name
=
"training_dataset_cache"
,
default
=
False
,
help
=
help_wrap
(
"Determines whether to cache the training dataset on workers. "
"Typically used to improve training performance when training "
"data is in remote storage and can fit into worker memory."
)
)
if
tf_data_experimental_slack
:
if
tf_data_experimental_slack
:
flags
.
DEFINE_boolean
(
flags
.
DEFINE_boolean
(
name
=
"tf_data_experimental_slack"
,
name
=
"tf_data_experimental_slack"
,
...
...
official/vision/image_classification/common.py
View file @
06f22a59
...
@@ -298,7 +298,8 @@ def define_keras_flags(dynamic_loss_scale=True):
...
@@ -298,7 +298,8 @@ def define_keras_flags(dynamic_loss_scale=True):
fp16_implementation
=
True
,
fp16_implementation
=
True
,
tf_data_experimental_slack
=
True
,
tf_data_experimental_slack
=
True
,
enable_xla
=
True
,
enable_xla
=
True
,
force_v2_in_keras_compile
=
True
)
force_v2_in_keras_compile
=
True
,
training_dataset_cache
=
True
)
flags_core
.
define_image
()
flags_core
.
define_image
()
flags_core
.
define_benchmark
()
flags_core
.
define_benchmark
()
flags_core
.
define_distribution
()
flags_core
.
define_distribution
()
...
@@ -327,8 +328,8 @@ def define_keras_flags(dynamic_loss_scale=True):
...
@@ -327,8 +328,8 @@ def define_keras_flags(dynamic_loss_scale=True):
flags
.
DEFINE_integer
(
flags
.
DEFINE_integer
(
name
=
'train_steps'
,
default
=
None
,
name
=
'train_steps'
,
default
=
None
,
help
=
'The number of steps to run for training. If it is larger than '
help
=
'The number of steps to run for training. If it is larger than '
'# batches per epoch, then use # batches per epoch.
When t
his flag
is
'
'# batches per epoch, then use # batches per epoch.
T
his flag
will be
'
'
set, only one
epoch is
going to run for training.
'
)
'
ignored if train_
epoch
s
is
set to be larger than 1.
'
)
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
name
=
'profile_steps'
,
default
=
None
,
name
=
'profile_steps'
,
default
=
None
,
help
=
'Save profiling data to model dir at given range of steps. The '
help
=
'Save profiling data to model dir at given range of steps. The '
...
...
official/vision/image_classification/imagenet_preprocessing.py
View file @
06f22a59
...
@@ -255,7 +255,8 @@ def input_fn(is_training,
...
@@ -255,7 +255,8 @@ def input_fn(is_training,
parse_record_fn
=
parse_record
,
parse_record_fn
=
parse_record
,
input_context
=
None
,
input_context
=
None
,
drop_remainder
=
False
,
drop_remainder
=
False
,
tf_data_experimental_slack
=
False
):
tf_data_experimental_slack
=
False
,
training_dataset_cache
=
False
):
"""Input function which provides batches for train or eval.
"""Input function which provides batches for train or eval.
Args:
Args:
...
@@ -272,6 +273,9 @@ def input_fn(is_training,
...
@@ -272,6 +273,9 @@ def input_fn(is_training,
batches. If True, the batch dimension will be static.
batches. If True, the batch dimension will be static.
tf_data_experimental_slack: Whether to enable tf.data's
tf_data_experimental_slack: Whether to enable tf.data's
`experimental_slack` option.
`experimental_slack` option.
training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in
remote storage and can fit into worker memory.
Returns:
Returns:
A dataset that can be used for iteration.
A dataset that can be used for iteration.
...
@@ -299,6 +303,11 @@ def input_fn(is_training,
...
@@ -299,6 +303,11 @@ def input_fn(is_training,
cycle_length
=
10
,
cycle_length
=
10
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
if
is_training
and
training_dataset_cache
:
# Improve training performance when training data is in remote storage and
# can fit into worker memory.
dataset
=
dataset
.
cache
()
return
process_record_dataset
(
return
process_record_dataset
(
dataset
=
dataset
,
dataset
=
dataset
,
is_training
=
is_training
,
is_training
=
is_training
,
...
...
official/vision/image_classification/resnet_imagenet_main.py
View file @
06f22a59
...
@@ -128,6 +128,7 @@ def run(flags_obj):
...
@@ -128,6 +128,7 @@ def run(flags_obj):
dtype
=
dtype
,
dtype
=
dtype
,
drop_remainder
=
drop_remainder
,
drop_remainder
=
drop_remainder
,
tf_data_experimental_slack
=
flags_obj
.
tf_data_experimental_slack
,
tf_data_experimental_slack
=
flags_obj
.
tf_data_experimental_slack
,
training_dataset_cache
=
flags_obj
.
training_dataset_cache
,
)
)
eval_input_dataset
=
None
eval_input_dataset
=
None
...
@@ -198,7 +199,8 @@ def run(flags_obj):
...
@@ -198,7 +199,8 @@ def run(flags_obj):
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
train_epochs
=
flags_obj
.
train_epochs
train_epochs
=
flags_obj
.
train_epochs
if
flags_obj
.
train_steps
:
# if mutliple epochs, ignore the train_steps flag.
if
train_epochs
<=
1
and
flags_obj
.
train_steps
:
train_steps
=
min
(
flags_obj
.
train_steps
,
train_steps
)
train_steps
=
min
(
flags_obj
.
train_steps
,
train_steps
)
train_epochs
=
1
train_epochs
=
1
...
@@ -254,7 +256,7 @@ def run(flags_obj):
...
@@ -254,7 +256,7 @@ def run(flags_obj):
def
define_imagenet_keras_flags
():
def
define_imagenet_keras_flags
():
common
.
define_keras_flags
()
common
.
define_keras_flags
()
flags_core
.
set_defaults
(
train_epochs
=
90
)
flags_core
.
set_defaults
()
flags
.
adopt_module_key_flags
(
common
)
flags
.
adopt_module_key_flags
(
common
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment