Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f16a7b5b
Unverified
Commit
f16a7b5b
authored
May 04, 2021
by
vedanshu
Committed by
GitHub
May 04, 2021
Browse files
Merge pull request
#1
from tensorflow/master
new pull
parents
8e9296ff
8f58f396
Changes
298
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
4090 deletions
+0
-4090
official/benchmark/models/cifar_preprocessing.py
official/benchmark/models/cifar_preprocessing.py
+0
-159
official/benchmark/models/resnet_cifar_main.py
official/benchmark/models/resnet_cifar_main.py
+0
-284
official/benchmark/models/resnet_cifar_model.py
official/benchmark/models/resnet_cifar_model.py
+0
-262
official/benchmark/models/resnet_cifar_test.py
official/benchmark/models/resnet_cifar_test.py
+0
-180
official/benchmark/models/resnet_imagenet_main.py
official/benchmark/models/resnet_imagenet_main.py
+0
-301
official/benchmark/models/resnet_imagenet_test.py
official/benchmark/models/resnet_imagenet_test.py
+0
-249
official/benchmark/models/resnet_imagenet_test_tpu.py
official/benchmark/models/resnet_imagenet_test_tpu.py
+0
-105
official/benchmark/models/shakespeare/README.md
official/benchmark/models/shakespeare/README.md
+0
-31
official/benchmark/models/shakespeare/__init__.py
official/benchmark/models/shakespeare/__init__.py
+0
-1
official/benchmark/models/shakespeare/shakespeare_main.py
official/benchmark/models/shakespeare/shakespeare_main.py
+0
-313
official/benchmark/models/synthetic_util.py
official/benchmark/models/synthetic_util.py
+0
-129
official/benchmark/ncf_keras_benchmark.py
official/benchmark/ncf_keras_benchmark.py
+0
-488
official/benchmark/nhnet_benchmark.py
official/benchmark/nhnet_benchmark.py
+0
-148
official/benchmark/owner_utils.py
official/benchmark/owner_utils.py
+0
-67
official/benchmark/owner_utils_test.py
official/benchmark/owner_utils_test.py
+0
-104
official/benchmark/perfzero_benchmark.py
official/benchmark/perfzero_benchmark.py
+0
-100
official/benchmark/resnet_ctl_imagenet_benchmark.py
official/benchmark/resnet_ctl_imagenet_benchmark.py
+0
-452
official/benchmark/retinanet_benchmark.py
official/benchmark/retinanet_benchmark.py
+0
-293
official/benchmark/shakespeare_benchmark.py
official/benchmark/shakespeare_benchmark.py
+0
-355
official/benchmark/tfhub_memory_usage_benchmark.py
official/benchmark/tfhub_memory_usage_benchmark.py
+0
-69
No files found.
Too many changes to show.
To preserve performance only
298 of 298+
files are displayed.
Plain diff
Email patch
official/benchmark/models/cifar_preprocessing.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to Cifar-10 dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
absl
import
logging
import
tensorflow
as
tf
from
official.vision.image_classification.resnet
import
imagenet_preprocessing
HEIGHT
=
32
WIDTH
=
32
NUM_CHANNELS
=
3
_DEFAULT_IMAGE_BYTES
=
HEIGHT
*
WIDTH
*
NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES
=
_DEFAULT_IMAGE_BYTES
+
1
# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
NUM_IMAGES
=
{
'train'
:
50000
,
'validation'
:
10000
,
}
_NUM_DATA_FILES
=
5
NUM_CLASSES
=
10
def
parse_record
(
raw_record
,
is_training
,
dtype
):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
This method converts the label to one hot to fit the loss function.
Args:
raw_record: scalar Tensor tf.string containing a serialized
Example protocol buffer.
is_training: A boolean denoting whether the input is for training.
dtype: Data type to use for input images.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector
=
tf
.
io
.
decode_raw
(
raw_record
,
tf
.
uint8
)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label
=
tf
.
cast
(
record_vector
[
0
],
tf
.
int32
)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major
=
tf
.
reshape
(
record_vector
[
1
:
_RECORD_BYTES
],
[
NUM_CHANNELS
,
HEIGHT
,
WIDTH
])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image
=
tf
.
cast
(
tf
.
transpose
(
a
=
depth_major
,
perm
=
[
1
,
2
,
0
]),
tf
.
float32
)
image
=
preprocess_image
(
image
,
is_training
)
image
=
tf
.
cast
(
image
,
dtype
)
return
image
,
label
def
preprocess_image
(
image
,
is_training
):
"""Preprocess a single image of layout [height, width, depth]."""
if
is_training
:
# Resize the image to add four extra pixels on each side.
image
=
tf
.
image
.
resize_with_crop_or_pad
(
image
,
HEIGHT
+
8
,
WIDTH
+
8
)
# Randomly crop a [HEIGHT, WIDTH] section of the image.
image
=
tf
.
image
.
random_crop
(
image
,
[
HEIGHT
,
WIDTH
,
NUM_CHANNELS
])
# Randomly flip the image horizontally.
image
=
tf
.
image
.
random_flip_left_right
(
image
)
# Subtract off the mean and divide by the variance of the pixels.
image
=
tf
.
image
.
per_image_standardization
(
image
)
return
image
def
get_filenames
(
is_training
,
data_dir
):
"""Returns a list of filenames."""
assert
tf
.
io
.
gfile
.
exists
(
data_dir
),
(
'Run cifar10_download_and_extract.py first to download and extract the '
'CIFAR-10 data.'
)
if
is_training
:
return
[
os
.
path
.
join
(
data_dir
,
'data_batch_%d.bin'
%
i
)
for
i
in
range
(
1
,
_NUM_DATA_FILES
+
1
)
]
else
:
return
[
os
.
path
.
join
(
data_dir
,
'test_batch.bin'
)]
def
input_fn
(
is_training
,
data_dir
,
batch_size
,
dtype
=
tf
.
float32
,
datasets_num_private_threads
=
None
,
parse_record_fn
=
parse_record
,
input_context
=
None
,
drop_remainder
=
False
):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
Returns:
A dataset that can be used for iteration.
"""
filenames
=
get_filenames
(
is_training
,
data_dir
)
dataset
=
tf
.
data
.
FixedLengthRecordDataset
(
filenames
,
_RECORD_BYTES
)
if
input_context
:
logging
.
info
(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d'
,
input_context
.
input_pipeline_id
,
input_context
.
num_input_pipelines
)
dataset
=
dataset
.
shard
(
input_context
.
num_input_pipelines
,
input_context
.
input_pipeline_id
)
return
imagenet_preprocessing
.
process_record_dataset
(
dataset
=
dataset
,
is_training
=
is_training
,
batch_size
=
batch_size
,
shuffle_buffer
=
NUM_IMAGES
[
'train'
],
parse_record_fn
=
parse_record_fn
,
dtype
=
dtype
,
datasets_num_private_threads
=
datasets_num_private_threads
,
drop_remainder
=
drop_remainder
)
official/benchmark/models/resnet_cifar_main.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the Cifar-10 dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
from
official.benchmark.models
import
cifar_preprocessing
from
official.benchmark.models
import
resnet_cifar_model
from
official.benchmark.models
import
synthetic_util
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
from
official.vision.image_classification.resnet
import
common
LR_SCHEDULE
=
[
# (multiplier, epoch to start) tuples
(
0.1
,
91
),
(
0.01
,
136
),
(
0.001
,
182
)
]
def
learning_rate_schedule
(
current_epoch
,
current_batch
,
batches_per_epoch
,
batch_size
):
"""Handles linear scaling rule and LR decay.
Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
provided scaling factor.
Args:
current_epoch: integer, current epoch indexed from 0.
current_batch: integer, current batch in the current epoch, indexed from 0.
batches_per_epoch: integer, number of steps in an epoch.
batch_size: integer, total batch sized.
Returns:
Adjusted learning rate.
"""
del
current_batch
,
batches_per_epoch
# not used
initial_learning_rate
=
common
.
BASE_LEARNING_RATE
*
batch_size
/
128
learning_rate
=
initial_learning_rate
for
mult
,
start_epoch
in
LR_SCHEDULE
:
if
current_epoch
>=
start_epoch
:
learning_rate
=
initial_learning_rate
*
mult
else
:
break
return
learning_rate
class
LearningRateBatchScheduler
(
tf
.
keras
.
callbacks
.
Callback
):
"""Callback to update learning rate on every batch (not epoch boundaries).
N.B. Only support Keras optimizers, not TF optimizers.
Attributes:
schedule: a function that takes an epoch index and a batch index as input
(both integer, indexed from 0) and returns a new learning rate as
output (float).
"""
def
__init__
(
self
,
schedule
,
batch_size
,
steps_per_epoch
):
super
(
LearningRateBatchScheduler
,
self
).
__init__
()
self
.
schedule
=
schedule
self
.
steps_per_epoch
=
steps_per_epoch
self
.
batch_size
=
batch_size
self
.
epochs
=
-
1
self
.
prev_lr
=
-
1
def
on_epoch_begin
(
self
,
epoch
,
logs
=
None
):
if
not
hasattr
(
self
.
model
.
optimizer
,
'learning_rate'
):
raise
ValueError
(
'Optimizer must have a "learning_rate" attribute.'
)
self
.
epochs
+=
1
def
on_batch_begin
(
self
,
batch
,
logs
=
None
):
"""Executes before step begins."""
lr
=
self
.
schedule
(
self
.
epochs
,
batch
,
self
.
steps_per_epoch
,
self
.
batch_size
)
if
not
isinstance
(
lr
,
(
float
,
np
.
float32
,
np
.
float64
)):
raise
ValueError
(
'The output of the "schedule" function should be float.'
)
if
lr
!=
self
.
prev_lr
:
self
.
model
.
optimizer
.
learning_rate
=
lr
# lr should be a float here
self
.
prev_lr
=
lr
logging
.
debug
(
'Epoch %05d Batch %05d: LearningRateBatchScheduler '
'change learning rate to %s.'
,
self
.
epochs
,
batch
,
lr
)
def
run
(
flags_obj
):
"""Run ResNet Cifar-10 training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils
.
set_session_config
(
enable_xla
=
flags_obj
.
enable_xla
)
# Execute flag override logic for better model performance
if
flags_obj
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
flags_obj
.
per_gpu_thread_count
,
gpu_thread_mode
=
flags_obj
.
tf_gpu_thread_mode
,
num_gpus
=
flags_obj
.
num_gpus
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
)
common
.
set_cudnn_batchnorm_mode
()
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
)
if
dtype
==
'fp16'
:
raise
ValueError
(
'dtype fp16 is not supported in Keras. Use the default '
'value(fp32).'
)
data_format
=
flags_obj
.
data_format
if
data_format
is
None
:
data_format
=
(
'channels_first'
if
tf
.
config
.
list_physical_devices
(
'GPU'
)
else
'channels_last'
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
strategy
=
distribution_utils
.
get_distribution_strategy
(
distribution_strategy
=
flags_obj
.
distribution_strategy
,
num_gpus
=
flags_obj
.
num_gpus
,
all_reduce_alg
=
flags_obj
.
all_reduce_alg
,
num_packs
=
flags_obj
.
num_packs
)
if
strategy
:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy
.
extended
.
experimental_enable_get_next_as_optional
=
(
flags_obj
.
enable_get_next_as_optional
)
strategy_scope
=
distribution_utils
.
get_strategy_scope
(
strategy
)
if
flags_obj
.
use_synthetic_data
:
synthetic_util
.
set_up_synthetic_data
()
input_fn
=
common
.
get_synth_input_fn
(
height
=
cifar_preprocessing
.
HEIGHT
,
width
=
cifar_preprocessing
.
WIDTH
,
num_channels
=
cifar_preprocessing
.
NUM_CHANNELS
,
num_classes
=
cifar_preprocessing
.
NUM_CLASSES
,
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
),
drop_remainder
=
True
)
else
:
synthetic_util
.
undo_set_up_synthetic_data
()
input_fn
=
cifar_preprocessing
.
input_fn
train_input_dataset
=
input_fn
(
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
cifar_preprocessing
.
parse_record
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
,
dtype
=
dtype
,
# Setting drop_remainder to avoid the partial batch logic in normalization
# layer, which triggers tf.where and leads to extra memory copy of input
# sizes between host and GPU.
drop_remainder
=
(
not
flags_obj
.
enable_get_next_as_optional
))
eval_input_dataset
=
None
if
not
flags_obj
.
skip_eval
:
eval_input_dataset
=
input_fn
(
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
cifar_preprocessing
.
parse_record
)
steps_per_epoch
=
(
cifar_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
lr_schedule
=
0.1
if
flags_obj
.
use_tensor_lr
:
initial_learning_rate
=
common
.
BASE_LEARNING_RATE
*
flags_obj
.
batch_size
/
128
lr_schedule
=
tf
.
keras
.
optimizers
.
schedules
.
PiecewiseConstantDecay
(
boundaries
=
list
(
p
[
1
]
*
steps_per_epoch
for
p
in
LR_SCHEDULE
),
values
=
[
initial_learning_rate
]
+
list
(
p
[
0
]
*
initial_learning_rate
for
p
in
LR_SCHEDULE
))
with
strategy_scope
:
optimizer
=
common
.
get_optimizer
(
lr_schedule
)
model
=
resnet_cifar_model
.
resnet56
(
classes
=
cifar_preprocessing
.
NUM_CLASSES
)
model
.
compile
(
loss
=
'sparse_categorical_crossentropy'
,
optimizer
=
optimizer
,
metrics
=
([
'sparse_categorical_accuracy'
]
if
flags_obj
.
report_accuracy_metrics
else
None
),
run_eagerly
=
flags_obj
.
run_eagerly
)
train_epochs
=
flags_obj
.
train_epochs
callbacks
=
common
.
get_callbacks
()
if
not
flags_obj
.
use_tensor_lr
:
lr_callback
=
LearningRateBatchScheduler
(
schedule
=
learning_rate_schedule
,
batch_size
=
flags_obj
.
batch_size
,
steps_per_epoch
=
steps_per_epoch
)
callbacks
.
append
(
lr_callback
)
# if mutliple epochs, ignore the train_steps flag.
if
train_epochs
<=
1
and
flags_obj
.
train_steps
:
steps_per_epoch
=
min
(
flags_obj
.
train_steps
,
steps_per_epoch
)
train_epochs
=
1
num_eval_steps
=
(
cifar_preprocessing
.
NUM_IMAGES
[
'validation'
]
//
flags_obj
.
batch_size
)
validation_data
=
eval_input_dataset
if
flags_obj
.
skip_eval
:
if
flags_obj
.
set_learning_phase_to_train
:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf
.
keras
.
backend
.
set_learning_phase
(
1
)
num_eval_steps
=
None
validation_data
=
None
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device
=
tf
.
device
(
'/device:GPU:0'
)
no_dist_strat_device
.
__enter__
()
history
=
model
.
fit
(
train_input_dataset
,
epochs
=
train_epochs
,
steps_per_epoch
=
steps_per_epoch
,
callbacks
=
callbacks
,
validation_steps
=
num_eval_steps
,
validation_data
=
validation_data
,
validation_freq
=
flags_obj
.
epochs_between_evals
,
verbose
=
2
)
eval_output
=
None
if
not
flags_obj
.
skip_eval
:
eval_output
=
model
.
evaluate
(
eval_input_dataset
,
steps
=
num_eval_steps
,
verbose
=
2
)
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
no_dist_strat_device
.
__exit__
()
stats
=
common
.
build_stats
(
history
,
eval_output
,
callbacks
)
return
stats
def
define_cifar_flags
():
common
.
define_keras_flags
(
dynamic_loss_scale
=
False
)
flags_core
.
set_defaults
(
data_dir
=
'/tmp/cifar10_data/cifar-10-batches-bin'
,
model_dir
=
'/tmp/cifar10_model'
,
epochs_between_evals
=
10
,
batch_size
=
128
)
def
main
(
_
):
return
run
(
flags
.
FLAGS
)
if
__name__
==
'__main__'
:
logging
.
set_verbosity
(
logging
.
INFO
)
define_cifar_flags
()
app
.
run
(
main
)
official/benchmark/models/resnet_cifar_model.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""ResNet56 model for Keras adapted from tf.keras.applications.ResNet50.
# Reference:
- [Deep Residual Learning for Image Recognition](
https://arxiv.org/abs/1512.03385)
Adapted from code contributed by BigMoyan.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
tensorflow
as
tf
from
tensorflow.python.keras
import
backend
from
tensorflow.python.keras
import
initializers
from
tensorflow.python.keras
import
layers
from
tensorflow.python.keras
import
regularizers
BATCH_NORM_DECAY
=
0.997
BATCH_NORM_EPSILON
=
1e-5
L2_WEIGHT_DECAY
=
2e-4
def
identity_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
,
block
,
training
=
None
):
"""The identity block is the block that has no conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
"""
filters1
,
filters2
=
filters
if
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
3
else
:
bn_axis
=
1
conv_name_base
=
'res'
+
str
(
stage
)
+
block
+
'_branch'
bn_name_base
=
'bn'
+
str
(
stage
)
+
block
+
'_branch'
x
=
layers
.
Conv2D
(
filters1
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2a'
)(
input_tensor
)
x
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2a'
)(
x
,
training
=
training
)
x
=
layers
.
Activation
(
'relu'
)(
x
)
x
=
layers
.
Conv2D
(
filters2
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2b'
)(
x
)
x
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2b'
)(
x
,
training
=
training
)
x
=
layers
.
add
([
x
,
input_tensor
])
x
=
layers
.
Activation
(
'relu'
)(
x
)
return
x
def
conv_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
,
block
,
strides
=
(
2
,
2
),
training
=
None
):
"""A block that has a conv layer at shortcut.
Arguments:
input_tensor: input tensor
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: current block label, used for generating layer names
strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor for the block.
Note that from stage 3,
the first conv layer at main path is with strides=(2, 2)
And the shortcut should have strides=(2, 2) as well
"""
filters1
,
filters2
=
filters
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
bn_axis
=
3
else
:
bn_axis
=
1
conv_name_base
=
'res'
+
str
(
stage
)
+
block
+
'_branch'
bn_name_base
=
'bn'
+
str
(
stage
)
+
block
+
'_branch'
x
=
layers
.
Conv2D
(
filters1
,
kernel_size
,
strides
=
strides
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2a'
)(
input_tensor
)
x
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2a'
)(
x
,
training
=
training
)
x
=
layers
.
Activation
(
'relu'
)(
x
)
x
=
layers
.
Conv2D
(
filters2
,
kernel_size
,
padding
=
'same'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'2b'
)(
x
)
x
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'2b'
)(
x
,
training
=
training
)
shortcut
=
layers
.
Conv2D
(
filters2
,
(
1
,
1
),
strides
=
strides
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
conv_name_base
+
'1'
)(
input_tensor
)
shortcut
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
bn_name_base
+
'1'
)(
shortcut
,
training
=
training
)
x
=
layers
.
add
([
x
,
shortcut
])
x
=
layers
.
Activation
(
'relu'
)(
x
)
return
x
def
resnet_block
(
input_tensor
,
size
,
kernel_size
,
filters
,
stage
,
conv_strides
=
(
2
,
2
),
training
=
None
):
"""A block which applies conv followed by multiple identity blocks.
Arguments:
input_tensor: input tensor
size: integer, number of constituent conv/identity building blocks.
A conv block is applied once, followed by (size - 1) identity blocks.
kernel_size: default 3, the kernel size of
middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
conv_strides: Strides for the first conv layer in the block.
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
Output tensor after applying conv and identity blocks.
"""
x
=
conv_building_block
(
input_tensor
,
kernel_size
,
filters
,
stage
=
stage
,
strides
=
conv_strides
,
block
=
'block_0'
,
training
=
training
)
for
i
in
range
(
size
-
1
):
x
=
identity_building_block
(
x
,
kernel_size
,
filters
,
stage
=
stage
,
block
=
'block_%d'
%
(
i
+
1
),
training
=
training
)
return
x
def
resnet
(
num_blocks
,
classes
=
10
,
training
=
None
):
"""Instantiates the ResNet architecture.
Arguments:
num_blocks: integer, the number of conv/identity blocks in each block.
The ResNet contains 3 blocks with each block containing one conv block
followed by (layers_per_block - 1) number of idenity blocks. Each
conv/idenity block has 2 convolutional layers. With the input
convolutional layer and the pooling layer towards the end, this brings
the total size of the network to (6*num_blocks + 2)
classes: optional number of classes to classify images into
training: Only used if training keras model with Estimator. In other
scenarios it is handled automatically.
Returns:
A Keras model instance.
"""
input_shape
=
(
32
,
32
,
3
)
img_input
=
layers
.
Input
(
shape
=
input_shape
)
if
backend
.
image_data_format
()
==
'channels_first'
:
x
=
layers
.
Lambda
(
lambda
x
:
backend
.
permute_dimensions
(
x
,
(
0
,
3
,
1
,
2
)),
name
=
'transpose'
)(
img_input
)
bn_axis
=
1
else
:
# channel_last
x
=
img_input
bn_axis
=
3
x
=
layers
.
ZeroPadding2D
(
padding
=
(
1
,
1
),
name
=
'conv1_pad'
)(
x
)
x
=
layers
.
Conv2D
(
16
,
(
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'valid'
,
use_bias
=
False
,
kernel_initializer
=
'he_normal'
,
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
'conv1'
)(
x
)
x
=
layers
.
BatchNormalization
(
axis
=
bn_axis
,
momentum
=
BATCH_NORM_DECAY
,
epsilon
=
BATCH_NORM_EPSILON
,
name
=
'bn_conv1'
,)(
x
,
training
=
training
)
x
=
layers
.
Activation
(
'relu'
)(
x
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
16
,
16
],
stage
=
2
,
conv_strides
=
(
1
,
1
),
training
=
training
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
32
,
32
],
stage
=
3
,
conv_strides
=
(
2
,
2
),
training
=
training
)
x
=
resnet_block
(
x
,
size
=
num_blocks
,
kernel_size
=
3
,
filters
=
[
64
,
64
],
stage
=
4
,
conv_strides
=
(
2
,
2
),
training
=
training
)
rm_axes
=
[
1
,
2
]
if
backend
.
image_data_format
()
==
'channels_last'
else
[
2
,
3
]
x
=
layers
.
Lambda
(
lambda
x
:
backend
.
mean
(
x
,
rm_axes
),
name
=
'reduce_mean'
)(
x
)
x
=
layers
.
Dense
(
classes
,
activation
=
'softmax'
,
kernel_initializer
=
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
bias_regularizer
=
regularizers
.
l2
(
L2_WEIGHT_DECAY
),
name
=
'fc10'
)(
x
)
inputs
=
img_input
# Create model.
model
=
tf
.
keras
.
models
.
Model
(
inputs
,
x
,
name
=
'resnet56'
)
return
model
resnet20
=
functools
.
partial
(
resnet
,
num_blocks
=
3
)
resnet32
=
functools
.
partial
(
resnet
,
num_blocks
=
5
)
resnet56
=
functools
.
partial
(
resnet
,
num_blocks
=
9
)
resnet10
=
functools
.
partial
(
resnet
,
num_blocks
=
110
)
official/benchmark/models/resnet_cifar_test.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with Cifar data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tempfile
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
tensorflow.python.platform
import
googletest
from
official.benchmark.models
import
cifar_preprocessing
from
official.benchmark.models
import
resnet_cifar_main
from
official.utils.testing
import
integration
class
KerasCifarTest
(
googletest
.
TestCase
):
"""Unit tests for Keras ResNet with Cifar."""
_extra_flags
=
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
]
_tempdir
=
None
def
get_temp_dir
(
self
):
if
not
self
.
_tempdir
:
self
.
_tempdir
=
tempfile
.
mkdtemp
(
dir
=
googletest
.
GetTempDir
())
return
self
.
_tempdir
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasCifarTest
,
cls
).
setUpClass
()
resnet_cifar_main
.
define_cifar_flags
()
def
setUp
(
self
):
super
(
KerasCifarTest
,
self
).
setUp
()
cifar_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
def
tearDown
(
self
):
super
(
KerasCifarTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
def
test_end_to_end_no_dist_strat
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_cifar_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_no_dist_strat
(
self
):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags
=
[
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"off"
,
"-model_dir"
,
"keras_cifar_graph_no_dist_strat"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu
(
self
):
"""Test Keras model with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_1_gpu
(
self
):
"""Test Keras model in legacy graph mode with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-noenable_eager"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_graph_1_gpu"
,
"-data_format"
,
"channels_last"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu
(
self
):
"""Test Keras model with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_2_gpu
(
self
):
"""Test Keras model in legacy graph mode with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"mirrored"
,
"-model_dir"
,
"keras_cifar_graph_2_gpu"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags
integration
.
run_synthetic
(
main
=
resnet_cifar_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
googletest
.
main
()
official/benchmark/models/resnet_imagenet_main.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow_model_optimization
as
tfmot
from
official.modeling
import
performance
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
model_helpers
from
official.vision.image_classification
import
test_utils
from
official.vision.image_classification.resnet
import
common
from
official.vision.image_classification.resnet
import
imagenet_preprocessing
from
official.vision.image_classification.resnet
import
resnet_model
def
run
(
flags_obj
):
"""Run ResNet ImageNet training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
NotImplementedError: If some features are not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils
.
set_session_config
(
enable_xla
=
flags_obj
.
enable_xla
)
# Execute flag override logic for better model performance
if
flags_obj
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
flags_obj
.
per_gpu_thread_count
,
gpu_thread_mode
=
flags_obj
.
tf_gpu_thread_mode
,
num_gpus
=
flags_obj
.
num_gpus
,
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
)
common
.
set_cudnn_batchnorm_mode
()
dtype
=
flags_core
.
get_tf_dtype
(
flags_obj
)
performance
.
set_mixed_precision_policy
(
flags_core
.
get_tf_dtype
(
flags_obj
),
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
128
))
data_format
=
flags_obj
.
data_format
if
data_format
is
None
:
data_format
=
(
'channels_first'
if
tf
.
config
.
list_physical_devices
(
'GPU'
)
else
'channels_last'
)
tf
.
keras
.
backend
.
set_image_data_format
(
data_format
)
# Configures cluster spec for distribution strategy.
_
=
distribution_utils
.
configure_cluster
(
flags_obj
.
worker_hosts
,
flags_obj
.
task_index
)
strategy
=
distribution_utils
.
get_distribution_strategy
(
distribution_strategy
=
flags_obj
.
distribution_strategy
,
num_gpus
=
flags_obj
.
num_gpus
,
all_reduce_alg
=
flags_obj
.
all_reduce_alg
,
num_packs
=
flags_obj
.
num_packs
,
tpu_address
=
flags_obj
.
tpu
)
if
strategy
:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy
.
extended
.
experimental_enable_get_next_as_optional
=
(
flags_obj
.
enable_get_next_as_optional
)
strategy_scope
=
distribution_utils
.
get_strategy_scope
(
strategy
)
# pylint: disable=protected-access
if
flags_obj
.
use_synthetic_data
:
input_fn
=
common
.
get_synth_input_fn
(
height
=
imagenet_preprocessing
.
DEFAULT_IMAGE_SIZE
,
width
=
imagenet_preprocessing
.
DEFAULT_IMAGE_SIZE
,
num_channels
=
imagenet_preprocessing
.
NUM_CHANNELS
,
num_classes
=
imagenet_preprocessing
.
NUM_CLASSES
,
dtype
=
dtype
,
drop_remainder
=
True
)
else
:
input_fn
=
imagenet_preprocessing
.
input_fn
# When `enable_xla` is True, we always drop the remainder of the batches
# in the dataset, as XLA-GPU doesn't support dynamic shapes.
drop_remainder
=
flags_obj
.
enable_xla
# Current resnet_model.resnet50 input format is always channel-last.
# We use keras_application mobilenet model which input format is depends on
# the keras beckend image data format.
# This use_keras_image_data_format flags indicates whether image preprocessor
# output format should be same as the keras backend image data format or just
# channel-last format.
use_keras_image_data_format
=
(
flags_obj
.
model
==
'mobilenet'
)
train_input_dataset
=
input_fn
(
is_training
=
True
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
imagenet_preprocessing
.
get_parse_record_fn
(
use_keras_image_data_format
=
use_keras_image_data_format
),
datasets_num_private_threads
=
flags_obj
.
datasets_num_private_threads
,
dtype
=
dtype
,
drop_remainder
=
drop_remainder
,
tf_data_experimental_slack
=
flags_obj
.
tf_data_experimental_slack
,
training_dataset_cache
=
flags_obj
.
training_dataset_cache
,
)
eval_input_dataset
=
None
if
not
flags_obj
.
skip_eval
:
eval_input_dataset
=
input_fn
(
is_training
=
False
,
data_dir
=
flags_obj
.
data_dir
,
batch_size
=
flags_obj
.
batch_size
,
parse_record_fn
=
imagenet_preprocessing
.
get_parse_record_fn
(
use_keras_image_data_format
=
use_keras_image_data_format
),
dtype
=
dtype
,
drop_remainder
=
drop_remainder
)
lr_schedule
=
common
.
PiecewiseConstantDecayWithWarmup
(
batch_size
=
flags_obj
.
batch_size
,
epoch_size
=
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
],
warmup_epochs
=
common
.
LR_SCHEDULE
[
0
][
1
],
boundaries
=
list
(
p
[
1
]
for
p
in
common
.
LR_SCHEDULE
[
1
:]),
multipliers
=
list
(
p
[
0
]
for
p
in
common
.
LR_SCHEDULE
),
compute_lr_on_cpu
=
True
)
steps_per_epoch
=
(
imagenet_preprocessing
.
NUM_IMAGES
[
'train'
]
//
flags_obj
.
batch_size
)
with
strategy_scope
:
if
flags_obj
.
optimizer
==
'resnet50_default'
:
optimizer
=
common
.
get_optimizer
(
lr_schedule
)
elif
flags_obj
.
optimizer
==
'mobilenet_default'
:
initial_learning_rate
=
\
flags_obj
.
initial_learning_rate_per_sample
*
flags_obj
.
batch_size
optimizer
=
tf
.
keras
.
optimizers
.
SGD
(
learning_rate
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
,
decay_steps
=
steps_per_epoch
*
flags_obj
.
num_epochs_per_decay
,
decay_rate
=
flags_obj
.
lr_decay_factor
,
staircase
=
True
),
momentum
=
0.9
)
if
flags_obj
.
fp16_implementation
==
'graph_rewrite'
:
# Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as
# determined by flags_core.get_tf_dtype(flags_obj) would be 'float32'
# which will ensure tf.compat.v2.keras.mixed_precision and
# tf.train.experimental.enable_mixed_precision_graph_rewrite do not double
# up.
optimizer
=
tf
.
train
.
experimental
.
enable_mixed_precision_graph_rewrite
(
optimizer
)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
if
flags_obj
.
use_trivial_model
:
model
=
test_utils
.
trivial_model
(
imagenet_preprocessing
.
NUM_CLASSES
)
elif
flags_obj
.
model
==
'resnet50_v1.5'
:
model
=
resnet_model
.
resnet50
(
num_classes
=
imagenet_preprocessing
.
NUM_CLASSES
)
elif
flags_obj
.
model
==
'mobilenet'
:
# TODO(kimjaehong): Remove layers attribute when minimum TF version
# support 2.0 layers by default.
model
=
tf
.
keras
.
applications
.
mobilenet
.
MobileNet
(
weights
=
None
,
classes
=
imagenet_preprocessing
.
NUM_CLASSES
,
layers
=
tf
.
keras
.
layers
)
if
flags_obj
.
pretrained_filepath
:
model
.
load_weights
(
flags_obj
.
pretrained_filepath
)
if
flags_obj
.
pruning_method
==
'polynomial_decay'
:
if
dtype
!=
tf
.
float32
:
raise
NotImplementedError
(
'Pruning is currently only supported on dtype=tf.float32.'
)
pruning_params
=
{
'pruning_schedule'
:
tfmot
.
sparsity
.
keras
.
PolynomialDecay
(
initial_sparsity
=
flags_obj
.
pruning_initial_sparsity
,
final_sparsity
=
flags_obj
.
pruning_final_sparsity
,
begin_step
=
flags_obj
.
pruning_begin_step
,
end_step
=
flags_obj
.
pruning_end_step
,
frequency
=
flags_obj
.
pruning_frequency
),
}
model
=
tfmot
.
sparsity
.
keras
.
prune_low_magnitude
(
model
,
**
pruning_params
)
elif
flags_obj
.
pruning_method
:
raise
NotImplementedError
(
'Only polynomial_decay is currently supported.'
)
model
.
compile
(
loss
=
'sparse_categorical_crossentropy'
,
optimizer
=
optimizer
,
metrics
=
([
'sparse_categorical_accuracy'
]
if
flags_obj
.
report_accuracy_metrics
else
None
),
run_eagerly
=
flags_obj
.
run_eagerly
)
train_epochs
=
flags_obj
.
train_epochs
callbacks
=
common
.
get_callbacks
(
pruning_method
=
flags_obj
.
pruning_method
,
enable_checkpoint_and_export
=
flags_obj
.
enable_checkpoint_and_export
,
model_dir
=
flags_obj
.
model_dir
)
# if mutliple epochs, ignore the train_steps flag.
if
train_epochs
<=
1
and
flags_obj
.
train_steps
:
steps_per_epoch
=
min
(
flags_obj
.
train_steps
,
steps_per_epoch
)
train_epochs
=
1
num_eval_steps
=
(
imagenet_preprocessing
.
NUM_IMAGES
[
'validation'
]
//
flags_obj
.
batch_size
)
validation_data
=
eval_input_dataset
if
flags_obj
.
skip_eval
:
# Only build the training graph. This reduces memory usage introduced by
# control flow ops in layers that have different implementations for
# training and inference (e.g., batch norm).
if
flags_obj
.
set_learning_phase_to_train
:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf
.
keras
.
backend
.
set_learning_phase
(
1
)
num_eval_steps
=
None
validation_data
=
None
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribition strategy.
no_dist_strat_device
=
tf
.
device
(
'/device:GPU:0'
)
no_dist_strat_device
.
__enter__
()
history
=
model
.
fit
(
train_input_dataset
,
epochs
=
train_epochs
,
steps_per_epoch
=
steps_per_epoch
,
callbacks
=
callbacks
,
validation_steps
=
num_eval_steps
,
validation_data
=
validation_data
,
validation_freq
=
flags_obj
.
epochs_between_evals
,
verbose
=
2
)
eval_output
=
None
if
not
flags_obj
.
skip_eval
:
eval_output
=
model
.
evaluate
(
eval_input_dataset
,
steps
=
num_eval_steps
,
verbose
=
2
)
if
flags_obj
.
pruning_method
:
model
=
tfmot
.
sparsity
.
keras
.
strip_pruning
(
model
)
if
flags_obj
.
enable_checkpoint_and_export
:
if
dtype
==
tf
.
bfloat16
:
logging
.
warning
(
'Keras model.save does not support bfloat16 dtype.'
)
else
:
# Keras model.save assumes a float32 input designature.
export_path
=
os
.
path
.
join
(
flags_obj
.
model_dir
,
'saved_model'
)
model
.
save
(
export_path
,
include_optimizer
=
False
)
if
not
strategy
and
flags_obj
.
explicit_gpu_placement
:
no_dist_strat_device
.
__exit__
()
stats
=
common
.
build_stats
(
history
,
eval_output
,
callbacks
)
return
stats
def
define_imagenet_keras_flags
():
common
.
define_keras_flags
(
model
=
True
,
optimizer
=
True
,
pretrained_filepath
=
True
)
common
.
define_pruning_flags
()
flags_core
.
set_defaults
()
flags
.
adopt_module_key_flags
(
common
)
def
main
(
_
):
model_helpers
.
apply_clean
(
flags
.
FLAGS
)
stats
=
run
(
flags
.
FLAGS
)
logging
.
info
(
'Run stats:
\n
%s'
,
stats
)
if
__name__
==
'__main__'
:
logging
.
set_verbosity
(
logging
.
INFO
)
define_imagenet_keras_flags
()
app
.
run
(
main
)
official/benchmark/models/resnet_imagenet_test.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
tensorflow.python.eager
import
context
from
official.benchmark.models
import
resnet_imagenet_main
from
official.utils.testing
import
integration
from
official.vision.image_classification.resnet
import
imagenet_preprocessing
@
parameterized
.
parameters
(
"resnet"
,
# "resnet_polynomial_decay", b/151854314
"mobilenet"
,
# "mobilenet_polynomial_decay" b/151854314
)
class
KerasImagenetTest
(
tf
.
test
.
TestCase
):
"""Unit tests for Keras Models with ImageNet."""
_default_flags_dict
=
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
,
"-data_format"
,
"channels_last"
,
]
_extra_flags_dict
=
{
"resnet"
:
[
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
],
"resnet_polynomial_decay"
:
[
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
"mobilenet"
:
[
"-model"
,
"mobilenet"
,
"-optimizer"
,
"mobilenet_default"
,
],
"mobilenet_polynomial_decay"
:
[
"-model"
,
"mobilenet"
,
"-optimizer"
,
"mobilenet_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
}
_tempdir
=
None
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasImagenetTest
,
cls
).
setUpClass
()
resnet_imagenet_main
.
define_imagenet_keras_flags
()
def
setUp
(
self
):
super
(
KerasImagenetTest
,
self
).
setUp
()
imagenet_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
self
.
policy
=
\
tf
.
keras
.
mixed_precision
.
experimental
.
global_policy
()
def
tearDown
(
self
):
super
(
KerasImagenetTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
self
.
policy
)
def
get_extra_flags_dict
(
self
,
flags_key
):
return
self
.
_extra_flags_dict
[
flags_key
]
+
self
.
_default_flags_dict
def
test_end_to_end_no_dist_strat
(
self
,
flags_key
):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"off"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_graph_no_dist_strat
(
self
,
flags_key
):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags
=
[
"-enable_eager"
,
"false"
,
"-distribution_strategy"
,
"off"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu
(
self
,
flags_key
):
"""Test Keras model with 1 GPU."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-distribution_strategy"
,
"mirrored"
,
"-enable_checkpoint_and_export"
,
"1"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_1_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with 1 GPU and fp16."""
if
context
.
num_gpus
()
<
1
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
1
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"1"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is not currently supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu
(
self
,
flags_key
):
"""Test Keras model with 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_xla_2_gpu
(
self
,
flags_key
):
"""Test Keras model with XLA and 2 GPUs."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_2_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with 2 GPUs and fp16."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-dtype"
,
"fp16"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is not currently supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
def
test_end_to_end_xla_2_gpu_fp16
(
self
,
flags_key
):
"""Test Keras model with XLA, 2 GPUs and fp16."""
if
context
.
num_gpus
()
<
2
:
self
.
skipTest
(
"{} GPUs are not available for this test. {} GPUs are available"
.
format
(
2
,
context
.
num_gpus
()))
extra_flags
=
[
"-num_gpus"
,
"2"
,
"-dtype"
,
"fp16"
,
"-enable_xla"
,
"true"
,
"-distribution_strategy"
,
"mirrored"
,
]
extra_flags
=
extra_flags
+
self
.
get_extra_flags_dict
(
flags_key
)
if
"polynomial_decay"
in
extra_flags
:
self
.
skipTest
(
"Pruning with fp16 is not currently supported."
)
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/benchmark/models/resnet_imagenet_test_tpu.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data on TPU."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.benchmark.models
import
resnet_imagenet_main
from
official.utils.testing
import
integration
from
official.vision.image_classification.resnet
import
imagenet_preprocessing
class
KerasImagenetTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
"""Unit tests for Keras Models with ImageNet."""
_extra_flags_dict
=
{
"resnet"
:
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
],
"resnet_polynomial_decay"
:
[
"-batch_size"
,
"4"
,
"-train_steps"
,
"1"
,
"-use_synthetic_data"
,
"true"
,
"-model"
,
"resnet50_v1.5"
,
"-optimizer"
,
"resnet50_default"
,
"-pruning_method"
,
"polynomial_decay"
,
],
}
_tempdir
=
None
@
classmethod
def
setUpClass
(
cls
):
# pylint: disable=invalid-name
super
(
KerasImagenetTest
,
cls
).
setUpClass
()
resnet_imagenet_main
.
define_imagenet_keras_flags
()
def
setUp
(
self
):
super
(
KerasImagenetTest
,
self
).
setUp
()
imagenet_preprocessing
.
NUM_IMAGES
[
"validation"
]
=
4
self
.
policy
=
\
tf
.
keras
.
mixed_precision
.
experimental
.
global_policy
()
def
tearDown
(
self
):
super
(
KerasImagenetTest
,
self
).
tearDown
()
tf
.
io
.
gfile
.
rmtree
(
self
.
get_temp_dir
())
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
self
.
policy
)
@
parameterized
.
parameters
([
"resnet"
,
# "resnet_polynomial_decay" b/151854314
])
def
test_end_to_end_tpu
(
self
,
flags_key
):
"""Test Keras model with TPU distribution strategy."""
extra_flags
=
[
"-distribution_strategy"
,
"tpu"
,
"-data_format"
,
"channels_last"
,
"-enable_checkpoint_and_export"
,
"1"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags_dict
[
flags_key
]
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
@
parameterized
.
parameters
([
"resnet"
])
def
test_end_to_end_tpu_bf16
(
self
,
flags_key
):
"""Test Keras model with TPU and bfloat16 activation."""
extra_flags
=
[
"-distribution_strategy"
,
"tpu"
,
"-data_format"
,
"channels_last"
,
"-dtype"
,
"bf16"
,
]
extra_flags
=
extra_flags
+
self
.
_extra_flags_dict
[
flags_key
]
integration
.
run_synthetic
(
main
=
resnet_imagenet_main
.
run
,
tmp_root
=
self
.
get_temp_dir
(),
extra_flags
=
extra_flags
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/benchmark/models/shakespeare/README.md
deleted
100644 → 0
View file @
8e9296ff
# Shakespeare character LSTM model
This is an implemention of a simple character LSTM used to generate text.
## Instructions
First download the source data:
```
wget https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
```
Note that files other than shakepeare.txt can also be used to train the model to generater other text.
Then train the model:
```
python
python3
shakespeare_main
.
py
--
training_data
shakespeare
.
txt
\
--
model_dir
/
tmp
/
shakespeare
```
This will place model checkpoints in
`/tmp/shakespeare`
, so that we can use them to make predictions.
Then generate predictions:
```
python
python3
shakespeare_main
.
py
--
training_data
shakespeare
.
txt
\
--
model_dir
/
tmp
/
shakespeare
--
notrain
--
predict_context
=
ROMEO
:
```
Change
`--predict_context`
and
`--predict_length`
to suit your needs.
official/benchmark/models/shakespeare/__init__.py
deleted
100644 → 0
View file @
8e9296ff
official/benchmark/models/shakespeare/shakespeare_main.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a character LSTM model trained on Shakespeare."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
os
# pylint: disable=wrong-import-order
from
absl
import
app
from
absl
import
flags
import
numpy
as
np
import
tensorflow
as
tf
# pylint: enable=wrong-import-order
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
EMBEDDING_DIM
=
256
RNN_UNITS
=
1024
SEQ_LENGTH
=
100
# Calculated by running batch_size=1
BATCHES_PER_EPOCH
=
11043
def
define_flags
():
"""Define the flags for the Shakespeare character LSTM."""
flags_core
.
define_base
(
data_dir
=
False
,
clean
=
False
,
train_epochs
=
True
,
epochs_between_evals
=
False
,
stop_threshold
=
False
,
num_gpu
=
True
,
export_dir
=
False
,
run_eagerly
=
True
,
distribution_strategy
=
True
)
flags_core
.
define_performance
(
num_parallel_calls
=
False
,
inter_op
=
False
,
intra_op
=
False
,
synthetic_data
=
False
,
max_train_steps
=
False
,
dtype
=
True
,
loss_scale
=
True
,
enable_xla
=
True
)
flags_core
.
set_defaults
(
train_epochs
=
43
,
batch_size
=
64
)
flags
.
DEFINE_boolean
(
name
=
'enable_eager'
,
default
=
True
,
help
=
'Enable eager?'
)
flags
.
DEFINE_boolean
(
name
=
'train'
,
default
=
True
,
help
=
'If true trains the model.'
)
flags
.
DEFINE_string
(
name
=
'predict_context'
,
default
=
None
,
help
=
'If set, makes a prediction with the given context.'
)
flags
.
DEFINE_integer
(
name
=
'predict_length'
,
default
=
1000
,
help
=
'Length of the predicted text including the context.'
)
flags
.
DEFINE_integer
(
name
=
'train_steps'
,
default
=
None
,
help
=
'Overrides train_steps per epoch if not None.'
)
flags
.
DEFINE_integer
(
name
=
'log_steps'
,
default
=
100
,
help
=
'For every log_steps, we log the timing information such as '
'examples per second.'
)
flags
.
DEFINE_string
(
name
=
'training_data'
,
default
=
None
,
help
=
'Path to file containing the training data.'
)
flags
.
DEFINE_boolean
(
name
=
'cudnn'
,
default
=
True
,
help
=
'Use CuDNN LSTM.'
)
def
get_dataset
(
path_to_file
,
batch_size
=
None
,
seq_length
=
SEQ_LENGTH
):
"""Creates a dataset from a given text file.
Args:
path_to_file: The path to the training data.
batch_size: Batch size to use.
seq_length: The length of the LSTM sequence.
Returns:
A tuple, consisting of the Dataset and the class to character mapping
and character to class mapping.
"""
with
tf
.
io
.
gfile
.
GFile
(
path_to_file
,
'rb'
)
as
train_data
:
text
=
train_data
.
read
().
decode
(
encoding
=
'utf-8'
)
# Create vocab
vocab
=
sorted
(
set
(
text
))
char2idx
=
{
u
:
i
for
i
,
u
in
enumerate
(
vocab
)}
idx2char
=
np
.
array
(
vocab
)
# Split text into sequence length + 1 chucks to create examples
text_as_int
=
np
.
array
([
char2idx
[
c
]
for
c
in
text
])
char_dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
text_as_int
)
sequences
=
char_dataset
.
batch
(
seq_length
+
1
,
drop_remainder
=
True
)
def
split_input_target
(
chunk
):
input_text
=
chunk
[:
-
1
]
target_text
=
chunk
[
1
:]
return
input_text
,
tf
.
one_hot
(
target_text
,
len
(
vocab
))
dataset
=
sequences
.
map
(
split_input_target
)
dataset
=
dataset
.
shuffle
(
10000
).
repeat
()
dataset
=
dataset
.
batch
(
batch_size
,
drop_remainder
=
True
)
return
dataset
,
idx2char
,
char2idx
def
build_model
(
vocab_size
,
embedding_dim
=
EMBEDDING_DIM
,
rnn_units
=
RNN_UNITS
,
batch_size
=
None
,
stateful
=
False
,
use_cudnn
=
True
):
"""Builds the Shakespeare model.
Args:
vocab_size: The number of character classes in the input.
embedding_dim: The dimension of the embedding space for each class.
rnn_units: The number of RNN units in the layer.
batch_size: When predicting, the batch size of the predictions.
stateful: If true, the LSTM is stateful.
Returns:
A Keras Model.
"""
LSTM
=
functools
.
partial
(
tf
.
keras
.
layers
.
LSTM
,
implementation
=
2
)
# By indirecting the activation through a lambda layer, the logic to dispatch
# to CuDNN in V2 doesn't trigger and we force the LSTM to run in non-CuDNN
# mode.
lstm_activation
=
(
'tanh'
if
use_cudnn
else
lambda
x
:
tf
.
math
.
tanh
(
x
))
batch_shape
=
[
batch_size
if
stateful
else
None
,
None
]
return
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Embedding
(
vocab_size
,
embedding_dim
,
batch_input_shape
=
batch_shape
),
LSTM
(
rnn_units
,
activation
=
lstm_activation
,
return_sequences
=
True
,
stateful
=
stateful
,
recurrent_initializer
=
'glorot_uniform'
),
tf
.
keras
.
layers
.
Dense
(
vocab_size
),
tf
.
keras
.
layers
.
Softmax
(
dtype
=
tf
.
float32
)])
def
train_model
(
flags_obj
,
dataset
,
vocab_size
,
strategy
,
checkpoint_dir
=
None
):
"""Trains a Shakespeare model.
Args:
flags_obj: An object containing parsed flag values.s
dataset: the training data set.
vocab_size: the number of unique character classes.
strategy: distribution strategy to use.
checkpoint_dir: if not None, the directory in which to make checkpoints.
Returns:
The training history and callbacks.
"""
if
flags_obj
.
train_steps
:
train_steps
=
flags_obj
.
train_steps
else
:
train_steps
=
BATCHES_PER_EPOCH
//
flags_obj
.
batch_size
strategy_scope
=
distribution_utils
.
get_strategy_scope
(
strategy
)
with
strategy_scope
:
model
=
build_model
(
vocab_size
=
vocab_size
,
batch_size
=
flags_obj
.
batch_size
,
use_cudnn
=
flags_obj
.
cudnn
)
# When keras_use_ctl is False, Model.fit() automatically applies
# loss scaling so we don't need to create a LossScaleOptimizer.
model
.
compile
(
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(),
loss
=
tf
.
keras
.
losses
.
CategoricalCrossentropy
(),
metrics
=
[
tf
.
keras
.
metrics
.
Recall
(
top_k
=
1
,
name
=
'RecallAt1'
),
tf
.
keras
.
metrics
.
Recall
(
top_k
=
5
,
name
=
'RecallAt5'
)],
run_eagerly
=
flags_obj
.
run_eagerly
)
callbacks
=
[]
if
checkpoint_dir
:
checkpoint_prefix
=
os
.
path
.
join
(
checkpoint_dir
,
'ckpt_{epoch}'
)
checkpoint_callback
=
tf
.
keras
.
callbacks
.
ModelCheckpoint
(
filepath
=
checkpoint_prefix
,
save_weights_only
=
True
)
callbacks
.
append
(
checkpoint_callback
)
time_callback
=
keras_utils
.
TimeHistory
(
flags_obj
.
batch_size
,
flags_obj
.
log_steps
)
callbacks
.
append
(
time_callback
)
history
=
model
.
fit
(
dataset
,
epochs
=
flags_obj
.
train_epochs
,
steps_per_epoch
=
train_steps
,
callbacks
=
callbacks
,
verbose
=
2
)
return
history
,
callbacks
def
make_prediction
(
checkpoint_dir
,
length
,
context
,
idx2char
,
char2idx
):
"""Make predictions from a Shakespeare model.
Args:
checkpoint_dir: the directory from which to load checkpoints
length: the total length of the generated text (including the context).
context: the initial text with which the LSTM is primed.
idx2char: the character class to character mapping.
char2idx: the character to character class mapping.
Returns:
A generated string of text of the given length.
"""
prediction_model
=
build_model
(
vocab_size
=
len
(
idx2char
),
batch_size
=
1
,
stateful
=
True
)
prediction_model
.
load_weights
(
tf
.
train
.
latest_checkpoint
(
checkpoint_dir
))
prediction_model
.
build
(
tf
.
TensorShape
([
1
,
None
]))
input_eval
=
[
char2idx
[
s
]
for
s
in
context
]
input_eval
=
tf
.
expand_dims
(
input_eval
,
0
)
text_generated
=
[]
prediction_model
.
reset_states
()
for
_
in
range
(
length
-
len
(
context
)):
predictions
=
prediction_model
(
input_eval
)
predictions
=
tf
.
squeeze
(
predictions
,
0
)
# We applied a softmax to the output of the model so that
# tf.keras.metrics.Recall would work. We need logits for
# tf.random.categorical, so we convert the probabilities back to log odds
predictions
=
tf
.
math
.
log
(
predictions
/
(
1
-
predictions
))
random_output
=
tf
.
random
.
categorical
(
predictions
,
num_samples
=
1
)
selected_id
=
random_output
[
-
1
,
0
].
numpy
()
input_eval
=
tf
.
expand_dims
([
selected_id
],
0
)
text_generated
.
append
(
idx2char
[
selected_id
])
return
context
+
''
.
join
(
text_generated
)
def
run
(
flags_obj
):
"""Run Shakespeare training and predict.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dictionary with status from the run.
"""
if
not
flags_obj
.
training_data
:
raise
ValueError
(
'Must set the path to a training data file. e.g download the following '
'https://storage.googleapis.com/download.tensorflow.org/data/'
'shakespeare.txt'
)
if
flags_obj
.
dtype
==
'fp16'
:
policy
=
tf
.
keras
.
mixed_precision
.
experimental
.
Policy
(
'mixed_float16'
,
loss_scale
=
flags_core
.
get_loss_scale
(
flags_obj
,
default_for_fp16
=
'dynamic'
))
tf
.
keras
.
mixed_precision
.
experimental
.
set_policy
(
policy
)
keras_utils
.
set_session_config
(
enable_xla
=
flags_obj
.
enable_xla
)
strategy
=
distribution_utils
.
get_distribution_strategy
(
distribution_strategy
=
flags_obj
.
distribution_strategy
,
num_gpus
=
flags_obj
.
num_gpus
)
dataset
,
idx2char
,
char2idx
=
get_dataset
(
flags_obj
.
training_data
,
batch_size
=
flags_obj
.
batch_size
)
stats
=
{}
if
flags_obj
.
train
:
history
,
callbacks
=
train_model
(
flags_obj
,
dataset
,
len
(
idx2char
),
strategy
,
checkpoint_dir
=
flags_obj
.
model_dir
)
stats
[
'history'
]
=
history
.
history
stats
[
'callbacks'
]
=
callbacks
if
flags_obj
.
predict_context
:
if
not
flags_obj
.
model_dir
:
raise
ValueError
(
'Must set model_dir to get predictions.'
)
print
(
make_prediction
(
flags_obj
.
model_dir
,
flags_obj
.
predict_length
,
flags_obj
.
predict_context
,
idx2char
,
char2idx
))
return
stats
def
main
(
_
):
flags_obj
=
flags
.
FLAGS
run
(
flags_obj
)
if
__name__
==
'__main__'
:
define_flags
()
app
.
run
(
main
)
official/benchmark/models/synthetic_util.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions to generate data directly on devices."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
random
import
string
from
absl
import
logging
import
tensorflow
as
tf
# The `SyntheticDataset` is a temporary solution for generating synthetic data
# directly on devices. It is only useful for Keras with Distribution
# Strategies. We will have better support in `tf.data` or Distribution Strategy
# later.
class
SyntheticDataset
(
object
):
"""A dataset that generates synthetic data on each device."""
def
__init__
(
self
,
dataset
,
split_by
=
1
):
# dataset.take(1) doesn't have GPU kernel.
with
tf
.
device
(
'device:CPU:0'
):
tensor
=
tf
.
data
.
experimental
.
get_single_element
(
dataset
.
take
(
1
))
flat_tensor
=
tf
.
nest
.
flatten
(
tensor
)
variable_data
=
[]
initializers
=
[]
for
t
in
flat_tensor
:
rebatched_t
=
tf
.
split
(
t
,
num_or_size_splits
=
split_by
,
axis
=
0
)[
0
]
assert
rebatched_t
.
shape
.
is_fully_defined
(),
rebatched_t
.
shape
v
=
tf
.
compat
.
v1
.
get_local_variable
(
self
.
_random_name
(),
initializer
=
rebatched_t
)
variable_data
.
append
(
v
)
initializers
.
append
(
v
.
initializer
)
input_data
=
tf
.
nest
.
pack_sequence_as
(
tensor
,
variable_data
)
self
.
_iterator
=
SyntheticIterator
(
input_data
,
initializers
)
def
_random_name
(
self
,
size
=
10
,
chars
=
string
.
ascii_uppercase
+
string
.
digits
):
return
''
.
join
(
random
.
choice
(
chars
)
for
_
in
range
(
size
))
def
__iter__
(
self
):
return
self
.
_iterator
def
make_one_shot_iterator
(
self
):
return
self
.
_iterator
def
make_initializable_iterator
(
self
):
return
self
.
_iterator
class
SyntheticIterator
(
object
):
"""A dataset that generates synthetic data on each device."""
def
__init__
(
self
,
input_data
,
initializers
):
self
.
_input_data
=
input_data
self
.
_initializers
=
initializers
def
get_next
(
self
):
return
self
.
_input_data
def
next
(
self
):
return
self
.
__next__
()
def
__next__
(
self
):
try
:
return
self
.
get_next
()
except
tf
.
errors
.
OutOfRangeError
:
raise
StopIteration
def
initialize
(
self
):
if
tf
.
executing_eagerly
():
return
tf
.
no_op
()
else
:
return
self
.
_initializers
def
_monkey_patch_dataset_method
(
strategy
):
"""Monkey-patch `strategy`'s `make_dataset_iterator` method."""
def
make_dataset
(
self
,
dataset
):
logging
.
info
(
'Using pure synthetic data.'
)
with
self
.
scope
():
if
self
.
extended
.
_global_batch_size
:
# pylint: disable=protected-access
return
SyntheticDataset
(
dataset
,
self
.
num_replicas_in_sync
)
else
:
return
SyntheticDataset
(
dataset
)
def
make_iterator
(
self
,
dataset
):
dist_dataset
=
make_dataset
(
self
,
dataset
)
return
iter
(
dist_dataset
)
strategy
.
orig_make_dataset_iterator
=
strategy
.
make_dataset_iterator
strategy
.
make_dataset_iterator
=
make_iterator
strategy
.
orig_distribute_dataset
=
strategy
.
experimental_distribute_dataset
strategy
.
experimental_distribute_dataset
=
make_dataset
def
_undo_monkey_patch_dataset_method
(
strategy
):
if
hasattr
(
strategy
,
'orig_make_dataset_iterator'
):
strategy
.
make_dataset_iterator
=
strategy
.
orig_make_dataset_iterator
if
hasattr
(
strategy
,
'orig_distribute_dataset'
):
strategy
.
make_dataset_iterator
=
strategy
.
orig_distribute_dataset
def
set_up_synthetic_data
():
_monkey_patch_dataset_method
(
tf
.
distribute
.
OneDeviceStrategy
)
_monkey_patch_dataset_method
(
tf
.
distribute
.
MirroredStrategy
)
_monkey_patch_dataset_method
(
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
)
def
undo_set_up_synthetic_data
():
_undo_monkey_patch_dataset_method
(
tf
.
distribute
.
OneDeviceStrategy
)
_undo_monkey_patch_dataset_method
(
tf
.
distribute
.
MirroredStrategy
)
_undo_monkey_patch_dataset_method
(
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
)
official/benchmark/ncf_keras_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
time
from
absl
import
flags
from
absl
import
logging
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
owner_utils
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
from
official.recommendation
import
ncf_common
from
official.recommendation
import
ncf_keras_main
from
official.utils.flags
import
core
FLAGS
=
flags
.
FLAGS
NCF_DATA_DIR_NAME
=
'movielens_data'
NCF_TF_REGRESSION_DATA_DIR_NAME
=
'gs://tf-regression/ncf/data'
class
NCFKerasBenchmarkBase
(
PerfZeroBenchmark
):
"""Base class for NCF model benchmark."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
super
(
NCFKerasBenchmarkBase
,
self
).
__init__
(
output_dir
,
default_flags
,
**
kwargs
)
# Run all benchmarks with ml_perf flag.
self
.
default_flags
[
'ml_perf'
]
=
True
def
_setup
(
self
):
"""Sets up and resets flags before each test."""
logging
.
set_verbosity
(
logging
.
INFO
)
if
NCFKerasBenchmarkBase
.
local_flags
is
None
:
ncf_common
.
define_ncf_flags
()
# Loads flags to get defaults to then override. List cannot be empty.
flags
.
FLAGS
([
'foo'
])
core
.
set_defaults
(
**
self
.
default_flags
)
saved_flag_values
=
flagsaver
.
save_flag_values
()
NCFKerasBenchmarkBase
.
local_flags
=
saved_flag_values
else
:
flagsaver
.
restore_flag_values
(
NCFKerasBenchmarkBase
.
local_flags
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
hr_at_10_min
=
0
,
hr_at_10_max
=
0
):
start_time_sec
=
time
.
time
()
stats
=
ncf_keras_main
.
run_ncf
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
metrics
=
[]
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
stats
[
'avg_exp_per_second'
]})
if
hr_at_10_min
>
0
:
metrics
.
append
({
'name'
:
'hr_at_10'
,
'value'
:
stats
[
'eval_hit_rate'
],
'min_value'
:
hr_at_10_min
,
'max_value'
:
hr_at_10_max
})
metrics
.
append
({
'name'
:
'train_loss'
,
'value'
:
stats
[
'loss'
]})
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
)
class
NCFKerasAccuracy
(
NCFKerasBenchmarkBase
):
"""Benchmark NCF model using real data."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
root_data_dir
=
root_data_dir
if
root_data_dir
else
''
default_flags
=
{}
default_flags
[
'dataset'
]
=
'ml-20m'
default_flags
[
'num_gpus'
]
=
1
default_flags
[
'train_epochs'
]
=
10
default_flags
[
'clean'
]
=
True
default_flags
[
'batch_size'
]
=
99000
default_flags
[
'learning_rate'
]
=
0.00382059
default_flags
[
'beta1'
]
=
0.783529
default_flags
[
'beta2'
]
=
0.909003
default_flags
[
'epsilon'
]
=
1.45439e-07
default_flags
[
'layers'
]
=
[
256
,
256
,
128
,
64
]
default_flags
[
'num_factors'
]
=
64
default_flags
[
'hr_threshold'
]
=
0.635
default_flags
[
'ml_perf'
]
=
True
default_flags
[
'use_synthetic_data'
]
=
False
default_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
NCF_DATA_DIR_NAME
)
super
(
NCFKerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
_run_and_report_benchmark_mlperf_like
(
self
):
"""Run test and report results.
Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
we want it recorded.
"""
self
.
_run_and_report_benchmark
(
hr_at_10_min
=
0.61
)
def
_run_and_report_benchmark
(
self
,
hr_at_10_min
=
0.630
,
hr_at_10_max
=
0.645
):
"""Run test and report results.
Note: Target is 0.635, but some runs are below that level. Until we have
multi-run tests, we have to accept a lower target.
Args:
hr_at_10_min: Minimum acceptable hr@10 value.
hr_at_10_max: Maximum acceptable hr@10 value.
"""
super
(
NCFKerasAccuracy
,
self
).
_run_and_report_benchmark
(
hr_at_10_min
=
hr_at_10_min
,
hr_at_10_max
=
hr_at_10_max
)
def
_set_8_gpu_defaults
(
self
):
FLAGS
.
num_gpus
=
8
FLAGS
.
learning_rate
=
0.0045
FLAGS
.
beta1
=
0.25
FLAGS
.
beta2
=
0.5
FLAGS
.
epsilon
=
1e-8
FLAGS
.
train_epochs
=
14
FLAGS
.
batch_size
=
99000
FLAGS
.
eval_batch_size
=
160000
FLAGS
.
train_dataset_path
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'training_cycle_*/*'
)
FLAGS
.
eval_dataset_path
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'eval_data/*'
)
FLAGS
.
input_meta_data_path
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'metadata'
)
FLAGS
.
data_dir
=
NCF_TF_REGRESSION_DATA_DIR_NAME
def
benchmark_1_gpu_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
early_stopping
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
early_stopping
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
early_stopping
=
True
FLAGS
.
run_eagerly
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
early_stopping
=
True
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_ctl_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
early_stopping
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_ctl_run_eagerly_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
early_stopping
=
True
FLAGS
.
run_eagerly
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_ctl_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
early_stopping
=
True
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpus_early_stop
(
self
):
self
.
_setup
()
FLAGS
.
early_stopping
=
True
FLAGS
.
num_gpus
=
2
FLAGS
.
eval_batch_size
=
160000
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpus_ctl_early_stop
(
self
):
"""NCF with custom training loop. Works only in TF 2.0."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
early_stopping
=
True
FLAGS
.
num_gpus
=
2
FLAGS
.
eval_batch_size
=
160000
self
.
_run_and_report_benchmark
()
#############################################
# Tests below with mlperf in the test name are of two types:
# 1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
# 2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
#
# The purpose of both is to get a number to compare to existing results. To do
# this the number of epochs is held constant rather than a race to a given
# accuracy. The accuracy validation is done by the "early_stop" tests.
#############################################
def
benchmark_1_gpu_mlperf_like
(
self
):
"""1 GPU using keras fit/compile."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_no_dist_strat_mlperf_like
(
self
):
"""1 GPU using compile/fit without dist_strat."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like
(
self
):
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
run_eagerly
=
True
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_xla_1_gpu_mlperf_like
(
self
):
"""1 GPU using compile/fit with XLA."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_mlperf_like
(
self
):
"""1 GPU using CTL."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
train_epochs
=
7
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_fp16_mlperf_like
(
self
):
"""1 GPU using CTL and FP16."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_fp16_mlperf_like
(
self
):
"""1 GPU using FP16."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_fp16_graph_rewrite_mlperf_like
(
self
):
"""1 GPU using CTL and FP16 graph rewrite."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_fp16_graph_rewrite_mlperf_like
(
self
):
"""1 GPU using FP16 graph rewrite."""
self
.
_setup
()
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_1_gpu_ctl_run_eagerly_mlperf_like
(
self
):
"""1 GPU using CTL with eager and distribution strategy."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
run_eagerly
=
True
FLAGS
.
train_epochs
=
7
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_ctl_mlperf_like
(
self
):
"""1 GPU using CTL with XLA."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
train_epochs
=
7
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_xla_1_gpu_fp16_mlperf_like
(
self
):
"""1 GPU using with XLA and FP16."""
self
.
_setup
()
FLAGS
.
enable_xla
=
True
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_xla_1_gpu_ctl_fp16_mlperf_like
(
self
):
"""1 GPU using CTL with XLA and FP16."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
train_epochs
=
7
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_mlperf_like
(
self
):
"""8 GPU using keras fit/compile."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
train_epochs
=
17
FLAGS
.
batch_size
=
1048576
FLAGS
.
eval_batch_size
=
160000
FLAGS
.
learning_rate
=
0.0045
FLAGS
.
beta1
=
0.25
FLAGS
.
beta2
=
0.5
FLAGS
.
epsilon
=
1e-8
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_ctl_mlperf_like
(
self
):
"""8 GPU using CTL."""
self
.
_setup
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
num_gpus
=
8
FLAGS
.
train_epochs
=
17
FLAGS
.
batch_size
=
1048576
FLAGS
.
eval_batch_size
=
160000
FLAGS
.
learning_rate
=
0.0045
FLAGS
.
beta1
=
0.25
FLAGS
.
beta2
=
0.5
FLAGS
.
epsilon
=
1e-8
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_tf_data_ctl_mlperf_like
(
self
):
"""8 GPU using CTL."""
self
.
_setup
()
self
.
_set_8_gpu_defaults
()
FLAGS
.
keras_use_ctl
=
True
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_tf_data_fp16_mlperf_like
(
self
):
"""8 GPU FP16."""
self
.
_setup
()
self
.
_set_8_gpu_defaults
()
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_tf_data_ctl_fp16_mlperf_like
(
self
):
"""8 GPU FP16 using CTL."""
self
.
_setup
()
self
.
_set_8_gpu_defaults
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
def
benchmark_8_gpu_tf_data_ctl_fp16_graph_rewrite_mlperf_like
(
self
):
"""8 GPU FP16 graph rewrite using CTL."""
self
.
_setup
()
self
.
_set_8_gpu_defaults
()
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
loss_scale
=
8192
self
.
_run_and_report_benchmark_mlperf_like
()
class
NCFKerasBenchmarkReal
(
NCFKerasBenchmarkBase
):
"""NCF Keras throughput benchmarks."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
root_data_dir
=
root_data_dir
if
root_data_dir
else
''
default_flags
=
{}
default_flags
[
'dataset'
]
=
'ml-20m'
default_flags
[
'num_gpus'
]
=
1
default_flags
[
'train_epochs'
]
=
14
default_flags
[
'clean'
]
=
True
default_flags
[
'batch_size'
]
=
99000
default_flags
[
'eval_batch_size'
]
=
160000
default_flags
[
'learning_rate'
]
=
0.00382059
default_flags
[
'beta1'
]
=
0.783529
default_flags
[
'beta2'
]
=
0.909003
default_flags
[
'epsilon'
]
=
1.45439e-07
default_flags
[
'layers'
]
=
[
256
,
256
,
128
,
64
]
default_flags
[
'num_factors'
]
=
64
default_flags
[
'hr_threshold'
]
=
0.635
default_flags
[
'ml_perf'
]
=
True
default_flags
[
'use_synthetic_data'
]
=
False
default_flags
[
'train_dataset_path'
]
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'training_cycle_*/*'
)
default_flags
[
'eval_dataset_path'
]
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'eval_data/*'
)
default_flags
[
'input_meta_data_path'
]
=
os
.
path
.
join
(
NCF_TF_REGRESSION_DATA_DIR_NAME
,
'metadata'
)
default_flags
[
'data_dir'
]
=
NCF_TF_REGRESSION_DATA_DIR_NAME
super
(
NCFKerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
benchmark_2x2_tpu
(
self
):
"""2x2 TPU using CTL with distribution strategy."""
self
.
_setup
()
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
num_gpus
=
0
FLAGS
.
train_epochs
=
1
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-graph-compiler'
)
def
benchmark_2x2_tpu_mlir
(
self
):
"""2x2 TPU using CTL with distribution strategy using the MLIR bridge."""
self
.
_setup
()
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
keras_use_ctl
=
True
FLAGS
.
num_gpus
=
0
FLAGS
.
train_epochs
=
1
tf
.
config
.
experimental
.
enable_mlir_bridge
()
self
.
_run_and_report_benchmark
()
class
NCFKerasSynth
(
NCFKerasBenchmarkBase
):
"""Benchmark NCF model using synthetic data."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
**
kwargs
):
default_flags
=
{}
default_flags
[
'dataset'
]
=
'ml-20m'
default_flags
[
'num_gpus'
]
=
1
default_flags
[
'train_epochs'
]
=
8
default_flags
[
'batch_size'
]
=
99000
default_flags
[
'eval_batch_size'
]
=
160000
default_flags
[
'learning_rate'
]
=
0.00382059
default_flags
[
'beta1'
]
=
0.783529
default_flags
[
'beta2'
]
=
0.909003
default_flags
[
'epsilon'
]
=
1.45439e-07
default_flags
[
'layers'
]
=
[
256
,
256
,
128
,
64
]
default_flags
[
'num_factors'
]
=
64
default_flags
[
'hr_threshold'
]
=
0.635
default_flags
[
'use_synthetic_data'
]
=
True
super
(
NCFKerasSynth
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
**
kwargs
)
def
benchmark_1_gpu
(
self
):
self
.
_setup
()
self
.
_run_and_report_benchmark
()
def
benchmark_2_gpus
(
self
):
self
.
_setup
()
FLAGS
.
num_gpus
=
2
self
.
_run_and_report_benchmark
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/benchmark/nhnet_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes benchmark testing for bert pretraining."""
# pylint: disable=line-too-long
from
__future__
import
print_function
import
time
from
typing
import
Optional
from
absl
import
flags
import
tensorflow
as
tf
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
owner_utils
from
official.benchmark
import
perfzero_benchmark
from
official.nlp.nhnet
import
trainer
from
official.utils.flags
import
core
as
flags_core
MIN_LOSS
=
0.40
MAX_LOSS
=
0.55
NHNET_DATA
=
'gs://tf-perfzero-data/nhnet/v1/processed/train.tfrecord*'
PRETRAINED_CHECKPOINT_PATH
=
'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_model.ckpt'
FLAGS
=
flags
.
FLAGS
class
NHNetBenchmark
(
perfzero_benchmark
.
PerfZeroBenchmark
):
"""Base benchmark class for NHNet."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
tpu
=
None
,
**
kwargs
):
self
.
default_flags
=
default_flags
or
{}
flag_methods
=
trainer
.
define_flags
()
super
(
NHNetBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
flag_methods
=
flag_methods
,
tpu
=
tpu
,
**
kwargs
)
def
_report_benchmark
(
self
,
stats
,
wall_time_sec
,
max_value
=
None
,
min_value
=
None
):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
max_value: highest passing level.
min_value: lowest passing level.
"""
metrics
=
[]
metrics
.
append
({
'name'
:
'training_loss'
,
'value'
:
stats
[
'training_loss'
],
'min_value'
:
min_value
,
'max_value'
:
max_value
})
# These metrics are placeholders to avoid PerfZero failure.
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
0.0
,
})
metrics
.
append
({
'name'
:
'startup_time'
,
'value'
:
9999.
,
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
class
NHNetAccuracyBenchmark
(
NHNetBenchmark
):
"""Benchmark accuracy tests for NHNet."""
def
__init__
(
self
,
output_dir
:
Optional
[
str
]
=
None
,
tpu
:
Optional
[
str
]
=
None
,
**
kwargs
):
default_flags
=
dict
(
mode
=
'train'
,
train_file_pattern
=
NHNET_DATA
,
train_batch_size
=
1024
,
model_type
=
'nhnet'
,
len_title
=
15
,
len_passage
=
200
,
num_encoder_layers
=
12
,
num_decoder_layers
=
12
,
num_nhnet_articles
=
5
,
steps_per_loop
=
1000
,
params_override
=
'init_from_bert2bert=false'
)
super
(
NHNetAccuracyBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
tpu
=
tpu
,
**
kwargs
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
max_value
=
MAX_LOSS
,
min_value
=
MIN_LOSS
):
"""Runs and reports the benchmark given the provided configuration."""
start_time_sec
=
time
.
time
()
stats
=
trainer
.
run
()
wall_time_sec
=
time
.
time
()
-
start_time_sec
self
.
_report_benchmark
(
stats
,
wall_time_sec
,
max_value
=
max_value
,
min_value
=
min_value
)
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_accuracy_4x4_tpu_f32_50k_steps
(
self
):
"""Test bert pretraining with 4x4 TPU for 50k steps."""
# This is used for accuracy test.
self
.
_setup
()
FLAGS
.
train_steps
=
50000
FLAGS
.
checkpoint_interval
=
FLAGS
.
train_steps
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
init_checkpoint
=
PRETRAINED_CHECKPOINT_PATH
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_4x4_tpu_bf32_50k_steps'
)
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-model-garden'
)
def
benchmark_accuracy_4x4_tpu_f32_1k_steps
(
self
):
"""Test bert pretraining with 4x4 TPU for 1k steps."""
self
.
_setup
()
FLAGS
.
train_steps
=
1000
FLAGS
.
checkpoint_interval
=
FLAGS
.
train_steps
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_accuracy_4x4_tpu_bf32_1k_steps'
)
self
.
_run_and_report_benchmark
()
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/benchmark/owner_utils.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils to set Owner annotations on benchmarks.
@owner_utils.Owner('owner_team/user') can be set either at the benchmark class
level / benchmark method level or both.
Runner frameworks can use owner_utils.GetOwner(benchmark_method) to get the
actual owner. Python inheritance for the owner attribute is respected. (E.g
method level owner takes precedence over class level).
See owner_utils_test for associated tests and more examples.
The decorator can be applied both at the method level and at the class level.
Simple example:
===============
class MLBenchmark:
@Owner('example_id')
def benchmark_method_1_gpu(self):
return True
"""
def
Owner
(
owner_name
):
"""Sets the owner attribute on a decorated method or class."""
def
_Wrapper
(
func_or_class
):
"""Sets the benchmark owner attribute."""
func_or_class
.
__benchmark__owner__
=
owner_name
return
func_or_class
return
_Wrapper
def
GetOwner
(
benchmark_method_or_class
):
"""Gets the inherited owner attribute for this benchmark.
Checks for existence of __benchmark__owner__. If it's not present, looks for
it in the parent class's attribute list.
Args:
benchmark_method_or_class: A benchmark method or class.
Returns:
string - the associated owner if present / None.
"""
if
hasattr
(
benchmark_method_or_class
,
'__benchmark__owner__'
):
return
benchmark_method_or_class
.
__benchmark__owner__
elif
hasattr
(
benchmark_method_or_class
,
'__self__'
):
if
hasattr
(
benchmark_method_or_class
.
__self__
,
'__benchmark__owner__'
):
return
benchmark_method_or_class
.
__self__
.
__benchmark__owner__
return
None
official/benchmark/owner_utils_test.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.benchmark.owner_utils."""
from
absl.testing
import
absltest
from
official.benchmark
import
owner_utils
@
owner_utils
.
Owner
(
'static_owner'
)
def
static_function
(
foo
=
5
):
return
foo
def
static_function_without_owner
(
foo
=
5
):
return
foo
class
BenchmarkClassWithoutOwner
:
def
method_without_owner
(
self
):
return
100
@
owner_utils
.
Owner
(
'method_owner'
)
def
method_with_owner
(
self
):
return
200
@
owner_utils
.
Owner
(
'class_owner'
)
class
SomeBenchmarkClass
:
def
method_inherited_owner
(
self
):
return
123
@
owner_utils
.
Owner
(
'method_owner'
)
def
method_override_owner
(
self
):
return
345
@
owner_utils
.
Owner
(
'new_class_owner'
)
class
InheritedClass
(
SomeBenchmarkClass
):
def
method_inherited_owner
(
self
):
return
456
@
owner_utils
.
Owner
(
'new_method_owner'
)
def
method_override_owner
(
self
):
return
567
class
OwnerUtilsTest
(
absltest
.
TestCase
):
"""Tests to assert for owner decorator functionality."""
def
test_owner_tag_missing
(
self
):
self
.
assertEqual
(
None
,
owner_utils
.
GetOwner
(
static_function_without_owner
))
benchmark_class
=
BenchmarkClassWithoutOwner
()
self
.
assertEqual
(
None
,
owner_utils
.
GetOwner
(
benchmark_class
.
method_without_owner
))
self
.
assertEqual
(
100
,
benchmark_class
.
method_without_owner
())
self
.
assertEqual
(
'method_owner'
,
owner_utils
.
GetOwner
(
benchmark_class
.
method_with_owner
))
self
.
assertEqual
(
200
,
benchmark_class
.
method_with_owner
())
def
test_owner_attributes_static
(
self
):
self
.
assertEqual
(
'static_owner'
,
owner_utils
.
GetOwner
(
static_function
))
self
.
assertEqual
(
5
,
static_function
(
5
))
def
test_owner_attributes_per_class
(
self
):
level1
=
SomeBenchmarkClass
()
self
.
assertEqual
(
'class_owner'
,
owner_utils
.
GetOwner
(
level1
.
method_inherited_owner
))
self
.
assertEqual
(
123
,
level1
.
method_inherited_owner
())
self
.
assertEqual
(
'method_owner'
,
owner_utils
.
GetOwner
(
level1
.
method_override_owner
))
self
.
assertEqual
(
345
,
level1
.
method_override_owner
())
def
test_owner_attributes_inherited_class
(
self
):
level2
=
InheritedClass
()
self
.
assertEqual
(
'new_class_owner'
,
owner_utils
.
GetOwner
(
level2
.
method_inherited_owner
))
self
.
assertEqual
(
456
,
level2
.
method_inherited_owner
())
self
.
assertEqual
(
'new_method_owner'
,
owner_utils
.
GetOwner
(
level2
.
method_override_owner
))
self
.
assertEqual
(
567
,
level2
.
method_override_owner
())
if
__name__
==
'__main__'
:
absltest
.
main
()
official/benchmark/perfzero_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for creating PerfZero benchmarks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
absl
import
flags
from
absl
import
logging
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
FLAGS
=
flags
.
FLAGS
class
PerfZeroBenchmark
(
tf
.
test
.
Benchmark
):
"""Common methods used in PerfZero Benchmarks.
Handles the resetting of flags between tests, loading of default_flags,
overriding of defaults. PerfZero (OSS) runs each test in a separate
process reducing some need to reset the flags.
"""
local_flags
=
None
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
root_data_dir
=
None
,
flag_methods
=
None
,
tpu
=
None
):
"""Initialize class.
Args:
output_dir: Base directory to store all output for the test.
default_flags: Set of flags to pass to model.
root_data_dir: Optional param used by child classes to look for the
dataset.
flag_methods: Set of flag methods to run during setup.
tpu: (optional) TPU name to use in a TPU benchmark.
"""
if
os
.
getenv
(
'BENCHMARK_OUTPUT_DIR'
):
self
.
output_dir
=
os
.
getenv
(
'BENCHMARK_OUTPUT_DIR'
)
elif
output_dir
:
self
.
output_dir
=
output_dir
else
:
self
.
output_dir
=
'/tmp'
self
.
default_flags
=
default_flags
or
{}
self
.
flag_methods
=
flag_methods
or
{}
if
os
.
getenv
(
'BENCHMARK_TPU'
):
resolved_tpu
=
os
.
getenv
(
'BENCHMARK_TPU'
)
elif
tpu
:
resolved_tpu
=
tpu
else
:
resolved_tpu
=
None
if
resolved_tpu
:
# TPU models are expected to accept a --tpu=name flag. PerfZero creates
# the TPU at runtime and passes the TPU's name to this flag.
self
.
default_flags
[
'tpu'
]
=
resolved_tpu
logging
.
info
(
'root_data_dir: %s'
,
root_data_dir
)
@
property
def
tpu
(
self
):
return
self
.
default_flags
.
get
(
'tpu'
,
None
)
def
_get_model_dir
(
self
,
folder_name
):
"""Returns directory to store info, e.g. saved model and event log."""
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
def
_setup
(
self
):
"""Sets up and resets flags before each test."""
logging
.
set_verbosity
(
logging
.
INFO
)
if
PerfZeroBenchmark
.
local_flags
is
None
:
for
flag_method
in
self
.
flag_methods
:
flag_method
()
# Loads flags to get defaults to then override. List cannot be empty.
flags
.
FLAGS
([
'foo'
])
# Overrides flag values with defaults for the class of tests.
for
k
,
v
in
self
.
default_flags
.
items
():
setattr
(
FLAGS
,
k
,
v
)
saved_flag_values
=
flagsaver
.
save_flag_values
()
PerfZeroBenchmark
.
local_flags
=
saved_flag_values
else
:
flagsaver
.
restore_flag_values
(
PerfZeroBenchmark
.
local_flags
)
official/benchmark/resnet_ctl_imagenet_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes CTL benchmarks and accuracy tests."""
# pylint: disable=line-too-long,g-bad-import-order
from
__future__
import
print_function
import
os
import
time
from
absl
import
flags
import
tensorflow
as
tf
from
official.benchmark
import
owner_utils
from
official.vision.image_classification.resnet
import
common
from
official.vision.image_classification.resnet
import
resnet_ctl_imagenet_main
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
from
official.benchmark
import
benchmark_wrappers
from
official.utils.flags
import
core
as
flags_core
MIN_TOP_1_ACCURACY
=
0.76
MAX_TOP_1_ACCURACY
=
0.77
FLAGS
=
flags
.
FLAGS
class
CtlBenchmark
(
PerfZeroBenchmark
):
"""Base benchmark class with methods to simplify testing."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
flag_methods
=
None
):
self
.
default_flags
=
default_flags
or
{}
self
.
flag_methods
=
flag_methods
or
{}
super
(
CtlBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
self
.
default_flags
,
flag_methods
=
self
.
flag_methods
)
def
_report_benchmark
(
self
,
stats
,
wall_time_sec
,
top_1_max
=
None
,
top_1_min
=
None
,
total_batch_size
=
None
,
log_steps
=
None
,
warmup
=
1
,
start_time_sec
=
None
):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
total_batch_size: Global batch-size.
log_steps: How often the log was created for stats['step_timestamp_log'].
warmup: number of entries in stats['step_timestamp_log'] to ignore.
start_time_sec: the start time of the program in seconds since epoch.
"""
metrics
=
[]
if
'eval_acc'
in
stats
:
metrics
.
append
({
'name'
:
'accuracy_top_1'
,
'value'
:
stats
[
'eval_acc'
],
'min_value'
:
top_1_min
,
'max_value'
:
top_1_max
})
metrics
.
append
({
'name'
:
'eval_loss'
,
'value'
:
stats
[
'eval_loss'
]})
metrics
.
append
({
'name'
:
'top_1_train_accuracy'
,
'value'
:
stats
[
'train_acc'
]
})
metrics
.
append
({
'name'
:
'train_loss'
,
'value'
:
stats
[
'train_loss'
]})
if
(
warmup
and
'step_timestamp_log'
in
stats
and
len
(
stats
[
'step_timestamp_log'
])
>
warmup
+
1
):
# first entry in the time_log is start of step 0. The rest of the
# entries are the end of each step recorded
time_log
=
stats
[
'step_timestamp_log'
]
steps_elapsed
=
time_log
[
-
1
].
batch_index
-
time_log
[
warmup
].
batch_index
time_elapsed
=
time_log
[
-
1
].
timestamp
-
time_log
[
warmup
].
timestamp
examples_per_sec
=
total_batch_size
*
(
steps_elapsed
/
time_elapsed
)
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
examples_per_sec
})
if
'avg_exp_per_second'
in
stats
:
metrics
.
append
({
'name'
:
'avg_exp_per_second'
,
'value'
:
stats
[
'avg_exp_per_second'
]
})
if
start_time_sec
and
'step_timestamp_log'
in
stats
:
time_log
=
stats
[
'step_timestamp_log'
]
# time_log[0] is recorded at the beginning of the first step.
startup_time
=
time_log
[
0
].
timestamp
-
start_time_sec
metrics
.
append
({
'name'
:
'startup_time'
,
'value'
:
startup_time
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
class
Resnet50CtlAccuracy
(
CtlBenchmark
):
"""Benchmark accuracy tests for ResNet50 in CTL."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more named
arguments before updating the constructor.
"""
flag_methods
=
[
common
.
define_keras_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50CtlAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
def
benchmark_8_gpu
(
self
):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
128
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
dtype
=
'fp32'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16
(
self
):
"""Test Keras model with eager, 8 GPUs with tf.keras mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
256
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16'
)
FLAGS
.
dtype
=
'fp16'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Test Keras model with 8 GPUs and mixed precision via graph rewrite."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
batch_size
=
256
*
8
FLAGS
.
train_epochs
=
90
FLAGS
.
epochs_between_evals
=
10
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp'
)
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
stats
=
resnet_ctl_imagenet_main
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50CtlAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
MIN_TOP_1_ACCURACY
,
top_1_max
=
MAX_TOP_1_ACCURACY
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
100
,
start_time_sec
=
start_time_sec
)
class
Resnet50CtlBenchmarkBase
(
CtlBenchmark
):
"""Resnet50 benchmarks."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
):
flag_methods
=
[
common
.
define_keras_flags
]
super
(
Resnet50CtlBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
):
start_time_sec
=
time
.
time
()
stats
=
resnet_ctl_imagenet_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Warmup means the number of logged step time entries that are excluded in
# performance report. Default to exclude 1 FLAGS.log_steps time.
super
(
Resnet50CtlBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
,
warmup
=
1
,
start_time_sec
=
start_time_sec
)
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Test Keras model with 1 GPU, no distribution strategy."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_no_dist_strat'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Test Keras model with 1 GPU."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16
(
self
):
"""Test Keras model with 1 GPU with tf.keras mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16'
)
FLAGS
.
batch_size
=
256
FLAGS
.
dtype
=
'fp16'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_amp
(
self
):
"""Test Keras model with 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_amp
(
self
):
"""Test Keras model with XLA and 1 GPU with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_1_gpu_amp'
)
FLAGS
.
batch_size
=
256
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_eager
(
self
):
"""Test Keras model with 1 GPU in pure eager mode."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_eager'
)
FLAGS
.
batch_size
=
120
FLAGS
.
use_tf_function
=
False
FLAGS
.
use_tf_while_loop
=
False
FLAGS
.
single_l2_loss_op
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_fp16_eager
(
self
):
"""Test Keras model with 1 GPU with fp16 and pure eager mode."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
distribution_strategy
=
'one_device'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_1_gpu_fp16_eager'
)
FLAGS
.
batch_size
=
240
FLAGS
.
dtype
=
'fp16'
FLAGS
.
use_tf_function
=
False
FLAGS
.
use_tf_while_loop
=
False
FLAGS
.
single_l2_loss_op
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu
(
self
):
"""Test Keras model with 8 GPUs."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu'
)
FLAGS
.
batch_size
=
128
*
8
# 8 GPUs
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_fp16
(
self
):
"""Test Keras model with 8 GPUs with tf.keras mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_fp16'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
dtype
=
'fp16'
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_eager
(
self
):
"""Test Keras model with 8 GPUs, eager, fp32."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
use_tf_function
=
False
FLAGS
.
use_tf_while_loop
=
False
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_eager'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_eager_fp16
(
self
):
"""Test Keras model with 8 GPUs, eager, fp16."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
dtype
=
'fp16'
FLAGS
.
use_tf_function
=
False
FLAGS
.
use_tf_while_loop
=
False
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_eager_fp16'
)
FLAGS
.
batch_size
=
128
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_amp
(
self
):
"""Test Keras model with 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_amp
(
self
):
"""Test Keras model with XLA and 8 GPUs with automatic mixed precision."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
distribution_strategy
=
'mirrored'
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_xla_8_gpu_amp'
)
FLAGS
.
batch_size
=
256
*
8
# 8 GPUs
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
_set_df_common
(
self
):
FLAGS
.
steps_per_loop
=
500
FLAGS
.
train_epochs
=
2
FLAGS
.
train_steps
=
None
FLAGS
.
skip_eval
=
True
FLAGS
.
enable_eager
=
True
FLAGS
.
enable_tensorboard
=
False
FLAGS
.
distribution_strategy
=
'tpu'
FLAGS
.
report_accuracy_metrics
=
False
FLAGS
.
log_steps
=
50
FLAGS
.
single_l2_loss_op
=
True
FLAGS
.
use_tf_function
=
True
FLAGS
.
enable_checkpoint_and_export
=
False
def
benchmark_2x2_tpu_bf16
(
self
):
self
.
_setup
()
self
.
_set_df_common
()
FLAGS
.
batch_size
=
1024
FLAGS
.
dtype
=
'bf16'
self
.
_run_and_report_benchmark
()
def
benchmark_4x4_tpu_bf16
(
self
):
self
.
_setup
()
self
.
_set_df_common
()
FLAGS
.
batch_size
=
4096
FLAGS
.
dtype
=
'bf16'
self
.
_run_and_report_benchmark
()
@
owner_utils
.
Owner
(
'tf-graph-compiler'
)
def
benchmark_4x4_tpu_bf16_mlir
(
self
):
"""Run resnet model on 4x4 with the MLIR Bridge enabled."""
self
.
_setup
()
self
.
_set_df_common
()
FLAGS
.
batch_size
=
4096
FLAGS
.
dtype
=
'bf16'
tf
.
config
.
experimental
.
enable_mlir_bridge
()
self
.
_run_and_report_benchmark
()
def
benchmark_8x16_tpu_bf16
(
self
):
self
.
_setup
()
self
.
_set_df_common
()
FLAGS
.
batch_size
=
8192
FLAGS
.
dtype
=
'bf16'
self
.
_run_and_report_benchmark
()
def
fill_report_object
(
self
,
stats
):
super
(
Resnet50CtlBenchmarkBase
,
self
).
fill_report_object
(
stats
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50CtlBenchmarkSynth
(
Resnet50CtlBenchmarkBase
):
"""Resnet50 synthetic benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'use_synthetic_data'
]
=
True
def_flags
[
'train_steps'
]
=
110
def_flags
[
'steps_per_loop'
]
=
20
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50CtlBenchmarkSynth
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
class
Resnet50CtlBenchmarkReal
(
Resnet50CtlBenchmarkBase
):
"""Resnet50 real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
[
'train_steps'
]
=
110
def_flags
[
'steps_per_loop'
]
=
20
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50CtlBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/benchmark/retinanet_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes RetinaNet benchmarks and accuracy tests."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
# pylint: disable=g-bad-import-order
import
json
import
time
from
absl
import
flags
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
# pylint: enable=g-bad-import-order
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark
import
perfzero_benchmark
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
keras_utils
from
official.vision.detection
import
main
as
detection
from
official.vision.detection.configs
import
base_config
FLAGS
=
flags
.
FLAGS
# pylint: disable=line-too-long
COCO_TRAIN_DATA
=
'gs://tf-perfzero-data/coco/train*'
COCO_EVAL_DATA
=
'gs://tf-perfzero-data/coco/val*'
COCO_EVAL_JSON
=
'gs://tf-perfzero-data/coco/instances_val2017.json'
RESNET_CHECKPOINT_PATH
=
'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07'
# pylint: enable=line-too-long
class
DetectionBenchmarkBase
(
perfzero_benchmark
.
PerfZeroBenchmark
):
"""Base class to hold methods common to test classes."""
def
__init__
(
self
,
**
kwargs
):
super
(
DetectionBenchmarkBase
,
self
).
__init__
(
**
kwargs
)
self
.
timer_callback
=
None
def
_report_benchmark
(
self
,
stats
,
start_time_sec
,
wall_time_sec
,
min_ap
,
max_ap
,
warmup
):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from Detection models with known entries.
start_time_sec: the start of the benchmark execution in seconds
wall_time_sec: the duration of the benchmark execution in seconds
min_ap: Minimum detection AP constraint to verify correctness of the
model.
max_ap: Maximum detection AP accuracy constraint to verify correctness of
the model.
warmup: Number of time log entries to ignore when computing examples/sec.
"""
metrics
=
[{
'name'
:
'total_loss'
,
'value'
:
stats
[
'total_loss'
],
}]
if
self
.
timer_callback
:
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
self
.
timer_callback
.
get_examples_per_sec
(
warmup
)
})
metrics
.
append
({
'name'
:
'startup_time'
,
'value'
:
self
.
timer_callback
.
get_startup_time
(
start_time_sec
)
})
else
:
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
0.0
,
})
if
'eval_metrics'
in
stats
:
metrics
.
append
({
'name'
:
'AP'
,
'value'
:
stats
[
'AP'
],
'min_value'
:
min_ap
,
'max_value'
:
max_ap
,
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=
stats
[
'total_steps'
],
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
class
RetinanetBenchmarkBase
(
DetectionBenchmarkBase
):
"""Base class to hold methods common to test classes in the module."""
def
__init__
(
self
,
**
kwargs
):
self
.
train_data_path
=
COCO_TRAIN_DATA
self
.
eval_data_path
=
COCO_EVAL_DATA
self
.
eval_json_path
=
COCO_EVAL_JSON
self
.
resnet_checkpoint_path
=
RESNET_CHECKPOINT_PATH
super
(
RetinanetBenchmarkBase
,
self
).
__init__
(
**
kwargs
)
def
_run_detection_main
(
self
):
"""Starts detection job."""
if
self
.
timer_callback
:
FLAGS
.
log_steps
=
0
# prevent detection.run from adding the same callback
return
detection
.
run
(
callbacks
=
[
self
.
timer_callback
])
else
:
return
detection
.
run
()
class
RetinanetAccuracy
(
RetinanetBenchmarkBase
):
"""Accuracy test for RetinaNet model.
Tests RetinaNet detection task model accuracy. The naming
convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
params
,
min_ap
=
0.325
,
max_ap
=
0.35
,
do_eval
=
True
,
warmup
=
1
):
"""Starts RetinaNet accuracy benchmark test."""
FLAGS
.
params_override
=
json
.
dumps
(
params
)
# Need timer callback to measure performance
self
.
timer_callback
=
keras_utils
.
TimeHistory
(
batch_size
=
params
[
'train'
][
'batch_size'
],
log_steps
=
FLAGS
.
log_steps
,
)
start_time_sec
=
time
.
time
()
FLAGS
.
mode
=
'train'
summary
,
_
=
self
.
_run_detection_main
()
wall_time_sec
=
time
.
time
()
-
start_time_sec
if
do_eval
:
FLAGS
.
mode
=
'eval'
eval_metrics
=
self
.
_run_detection_main
()
summary
.
update
(
eval_metrics
)
summary
[
'total_steps'
]
=
params
[
'train'
][
'total_steps'
]
self
.
_report_benchmark
(
summary
,
start_time_sec
,
wall_time_sec
,
min_ap
,
max_ap
,
warmup
)
def
_setup
(
self
):
super
(
RetinanetAccuracy
,
self
).
_setup
()
FLAGS
.
model
=
'retinanet'
def
_params
(
self
):
return
{
'architecture'
:
{
'use_bfloat16'
:
True
,
},
'train'
:
{
'batch_size'
:
64
,
'iterations_per_loop'
:
100
,
'total_steps'
:
22500
,
'train_file_pattern'
:
self
.
train_data_path
,
'checkpoint'
:
{
'path'
:
self
.
resnet_checkpoint_path
,
'prefix'
:
'resnet50/'
},
# Speed up ResNet training when loading from the checkpoint.
'frozen_variable_prefix'
:
base_config
.
RESNET_FROZEN_VAR_PREFIX
,
},
'eval'
:
{
'batch_size'
:
8
,
'eval_samples'
:
5000
,
'val_json_file'
:
self
.
eval_json_path
,
'eval_file_pattern'
:
self
.
eval_data_path
,
},
}
@
flagsaver
.
flagsaver
def
benchmark_8_gpu_coco
(
self
):
"""Run RetinaNet model accuracy test with 8 GPUs."""
self
.
_setup
()
params
=
self
.
_params
()
FLAGS
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_coco'
)
FLAGS
.
strategy_type
=
'mirrored'
self
.
_run_and_report_benchmark
(
params
)
class
RetinanetBenchmarkReal
(
RetinanetAccuracy
):
"""Short benchmark performance tests for RetinaNet model.
Tests RetinaNet performance in different GPU configurations.
The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu` format.
"""
def
_setup
(
self
):
super
(
RetinanetBenchmarkReal
,
self
).
_setup
()
# Use negative value to avoid saving checkpoints.
FLAGS
.
save_checkpoint_freq
=
-
1
@
flagsaver
.
flagsaver
def
benchmark_8_gpu_coco
(
self
):
"""Run RetinaNet model accuracy test with 8 GPUs."""
self
.
_setup
()
params
=
self
.
_params
()
params
[
'architecture'
][
'use_bfloat16'
]
=
False
params
[
'train'
][
'total_steps'
]
=
1875
# One epoch.
# The iterations_per_loop must be one, otherwise the number of examples per
# second would be wrong. Currently only support calling callback per batch
# when each loop only runs on one batch, i.e. host loop for one step. The
# performance of this situation might be lower than the case of
# iterations_per_loop > 1.
# Related bug: b/135933080
params
[
'train'
][
'iterations_per_loop'
]
=
1
params
[
'eval'
][
'eval_samples'
]
=
8
FLAGS
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'real_benchmark_8_gpu_coco'
)
FLAGS
.
strategy_type
=
'mirrored'
self
.
_run_and_report_benchmark
(
params
)
@
flagsaver
.
flagsaver
def
benchmark_1_gpu_coco
(
self
):
"""Run RetinaNet model accuracy test with 1 GPU."""
self
.
_setup
()
params
=
self
.
_params
()
params
[
'architecture'
][
'use_bfloat16'
]
=
False
params
[
'train'
][
'batch_size'
]
=
8
params
[
'train'
][
'total_steps'
]
=
200
params
[
'train'
][
'iterations_per_loop'
]
=
1
params
[
'eval'
][
'eval_samples'
]
=
8
FLAGS
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'real_benchmark_1_gpu_coco'
)
FLAGS
.
strategy_type
=
'one_device'
self
.
_run_and_report_benchmark
(
params
)
@
flagsaver
.
flagsaver
def
benchmark_xla_1_gpu_coco
(
self
):
"""Run RetinaNet model accuracy test with 1 GPU and XLA enabled."""
self
.
_setup
()
params
=
self
.
_params
()
params
[
'architecture'
][
'use_bfloat16'
]
=
False
params
[
'train'
][
'batch_size'
]
=
8
params
[
'train'
][
'total_steps'
]
=
200
params
[
'train'
][
'iterations_per_loop'
]
=
1
params
[
'eval'
][
'eval_samples'
]
=
8
FLAGS
.
num_gpus
=
1
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'real_benchmark_xla_1_gpu_coco'
)
FLAGS
.
strategy_type
=
'one_device'
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
(
params
)
@
flagsaver
.
flagsaver
def
benchmark_2x2_tpu_coco
(
self
):
"""Run RetinaNet model accuracy test with 4 TPUs."""
self
.
_setup
()
params
=
self
.
_params
()
params
[
'train'
][
'batch_size'
]
=
64
params
[
'train'
][
'total_steps'
]
=
1875
# One epoch.
params
[
'train'
][
'iterations_per_loop'
]
=
500
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'real_benchmark_2x2_tpu_coco'
)
FLAGS
.
strategy_type
=
'tpu'
self
.
_run_and_report_benchmark
(
params
,
do_eval
=
False
,
warmup
=
0
)
@
flagsaver
.
flagsaver
def
benchmark_2x2_tpu_spinenet_coco
(
self
):
"""Run SpineNet with RetinaNet model accuracy test with 4 TPUs."""
self
.
_setup
()
params
=
self
.
_params
()
params
[
'architecture'
][
'backbone'
]
=
'spinenet'
params
[
'architecture'
][
'multilevel_features'
]
=
'identity'
params
[
'architecture'
][
'use_bfloat16'
]
=
False
params
[
'train'
][
'batch_size'
]
=
64
params
[
'train'
][
'total_steps'
]
=
1875
# One epoch.
params
[
'train'
][
'iterations_per_loop'
]
=
500
params
[
'train'
][
'checkpoint'
][
'path'
]
=
''
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'real_benchmark_2x2_tpu_spinenet_coco'
)
FLAGS
.
strategy_type
=
'tpu'
self
.
_run_and_report_benchmark
(
params
,
do_eval
=
False
,
warmup
=
0
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/benchmark/shakespeare_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Shakespeare (LSTM) benchmark and accuracy tests."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
time
from
absl
import
flags
import
tensorflow
as
tf
# pylint: disable=g-bad-import-order
from
official.benchmark.models.shakespeare
import
shakespeare_main
from
official.utils.flags
import
core
as
flags_core
from
official.utils.misc
import
keras_utils
from
official.benchmark
import
benchmark_wrappers
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
SHAKESPEARE_TRAIN_DATA
=
'shakespeare/shakespeare.txt'
TMP_DIR
=
os
.
getenv
(
'TMPDIR'
)
FLAGS
=
flags
.
FLAGS
class
ShakespeareBenchmarkBase
(
PerfZeroBenchmark
):
"""Base class for Shakespeare (LSTM) benchmark and accuracy tests."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
root_data_dir
=
None
):
super
(
ShakespeareBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
flag_methods
=
[
shakespeare_main
.
define_flags
])
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
top_1_train_min
=
0.91
,
top_1_train_max
=
0.94
,
warmup
=
1
,
log_steps
=
100
):
"""Report benchmark results by writing to local protobuf file.
Average epoch time is calculated by skipping the first epoch. This average
ignores time spent between epoch and is recorded by begin and end epoch. To
skip accuracy check set `top_1_train_min=None`.
Args:
top_1_train_min: lowest passing value.
top_1_train_max: highest passing value.
warmup: number of entries in `timestamp_log` to ignore.
log_steps: How often the log was created for `timestamp_log`.
"""
total_batch_size
=
FLAGS
.
batch_size
metrics
=
[]
start_time_sec
=
time
.
time
()
stats
=
shakespeare_main
.
run
(
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
if
top_1_train_min
:
metrics
.
append
({
'name'
:
'accuracy_top_1_train'
,
'value'
:
stats
[
'history'
][
'RecallAt1'
][
-
1
],
'min_value'
:
top_1_train_min
,
'max_value'
:
top_1_train_max
})
# Look for the time history callback which was used during keras.fit
for
callback
in
stats
[
'callbacks'
]:
if
isinstance
(
callback
,
keras_utils
.
TimeHistory
):
epoch_timings
=
callback
.
epoch_runtime_log
if
len
(
epoch_timings
)
>
1
:
average_time
=
sum
(
epoch_timings
[
1
:])
/
len
(
epoch_timings
[
1
:])
metrics
.
append
({
'name'
:
'avg_epoch_time'
,
'value'
:
average_time
})
# First entry in timestamp_log is the start of step 1. The rest of the
# entries are the end of each step recorded.
time_log
=
callback
.
timestamp_log
elapsed
=
time_log
[
-
1
].
timestamp
-
time_log
[
warmup
].
timestamp
num_examples
=
(
total_batch_size
*
log_steps
*
(
len
(
time_log
)
-
warmup
-
1
))
if
elapsed
>
0
:
examples_per_sec
=
num_examples
/
elapsed
metrics
.
append
({
'name'
:
'exp_per_second'
,
'value'
:
examples_per_sec
})
flags_str
=
flags_core
.
get_nondefault_flags_as_str
()
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
,
extras
=
{
'flags'
:
flags_str
})
class
ShakespeareAccuracy
(
ShakespeareBenchmarkBase
):
"""Shakespeare accuracy tests.
This is not an ideal test. The best we can use for the accuracy check is to
validate top_1 of the training set. At batch size 64 the top_1 training
stabilizes to ~0.92 around 40-45 epochs.
"""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
"""Shakespeare accuracy tests.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
self
.
train_data
=
os
.
path
.
join
(
root_data_dir
,
SHAKESPEARE_TRAIN_DATA
)
super
(
ShakespeareAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
root_data_dir
=
root_data_dir
)
def
benchmark_cpu
(
self
):
"""Benchmark cpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_ds_run_eagerly
(
self
):
"""Benchmark cpu without distribution strategies and run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Benchmark 1 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_ds
(
self
):
"""Benchmark 1 gpu without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_ds_run_eagerly
(
self
):
"""Benchmark 1 gpu without distribution strategies and run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
"""Benchmark 1 gpu w/xla."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu
(
self
):
"""Benchmark 8 gpu.
This is test is for accuracy not scaling. The batch-size is not scaled to
the number of gpus.
"""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
training_data
=
self
.
train_data
FLAGS
.
batch_size
=
64
FLAGS
.
train_epochs
=
43
FLAGS
.
model_dir
=
''
self
.
_run_and_report_benchmark
()
class
ShakespeareKerasBenchmarkReal
(
ShakespeareBenchmarkBase
):
"""Benchmark accuracy tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
TMP_DIR
,
**
kwargs
):
"""Benchmark tests w/Keras.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
self
.
train_data
=
os
.
path
.
join
(
root_data_dir
,
SHAKESPEARE_TRAIN_DATA
)
def_flags
=
{}
def_flags
[
'training_data'
]
=
self
.
train_data
def_flags
[
'model_dir'
]
=
''
def_flags
[
'train_epochs'
]
=
4
def_flags
[
'log_steps'
]
=
50
super
(
ShakespeareKerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
root_data_dir
=
root_data_dir
,
default_flags
=
def_flags
)
def
benchmark_cpu
(
self
):
"""Benchmark cpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
batch_size
=
64
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_ds_run_eagerly
(
self
):
"""Benchmark cpu without distribution strategy and run eagerly."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
batch_size
=
64
FLAGS
.
distribution_strategy
=
'off'
FLAGS
.
run_eagerly
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_ds
(
self
):
"""Benchmark cpu without distribution strategy."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
batch_size
=
64
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_cpu_no_ds_force_v2
(
self
):
"""Benchmark cpu no ds, and force v2."""
self
.
_setup
()
FLAGS
.
num_gpus
=
0
FLAGS
.
batch_size
=
64
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu
(
self
):
"""Benchmark 1 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_cudnn
(
self
):
"""Benchmark 1 gpu with CuDNN disabled."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
FLAGS
.
cudnn
=
False
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_ds
(
self
):
"""Benchmark 1 gpu without distribution strategies."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_1_gpu_no_ds_run_eagerly
(
self
):
"""Benchmark 1 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
FLAGS
.
run_eagerly
=
True
FLAGS
.
distribution_strategy
=
'off'
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu
(
self
):
"""Benchmark 1 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_xla_1_gpu_no_cudnn
(
self
):
"""Benchmark 1 gpu w/xla and CuDNN disabled."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
FLAGS
.
cudnn
=
False
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu
(
self
):
"""Benchmark 8 gpu."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
batch_size
=
64
*
8
FLAGS
.
log_steps
=
10
self
.
_run_and_report_benchmark
()
def
benchmark_8_gpu_no_cudnn
(
self
):
"""Benchmark 8 gpu with CuDNN disabled."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
batch_size
=
64
*
8
FLAGS
.
log_steps
=
10
FLAGS
.
cudnn
=
False
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu
(
self
):
"""Benchmark 8 gpu w/xla."""
self
.
_setup
()
FLAGS
.
num_gpus
=
1
FLAGS
.
batch_size
=
64
*
8
FLAGS
.
log_steps
=
10
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
benchmark_xla_8_gpu_no_cudnn
(
self
):
"""Benchmark 8 gpu w/xla and CuDNN disabled."""
self
.
_setup
()
FLAGS
.
num_gpus
=
8
FLAGS
.
batch_size
=
64
*
8
FLAGS
.
log_steps
=
10
FLAGS
.
cudnn
=
False
FLAGS
.
enable_xla
=
True
self
.
_run_and_report_benchmark
()
def
_run_and_report_benchmark
(
self
):
"""Run and report benchmark."""
super
(
ShakespeareKerasBenchmarkReal
,
self
).
_run_and_report_benchmark
(
top_1_train_min
=
None
,
log_steps
=
FLAGS
.
log_steps
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/benchmark/tfhub_memory_usage_benchmark.py
deleted
100644 → 0
View file @
8e9296ff
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a memory usage benchmark for a Tensorflow Hub model.
Loads a SavedModel and records memory usage.
"""
import
functools
import
time
from
absl
import
flags
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
from
official.benchmark.perfzero_benchmark
import
PerfZeroBenchmark
FLAGS
=
flags
.
FLAGS
class
TfHubMemoryUsageBenchmark
(
PerfZeroBenchmark
):
"""A benchmark measuring memory usage for a given TF Hub SavedModel."""
def
__init__
(
self
,
hub_model_handle_list
=
None
,
output_dir
=
None
,
default_flags
=
None
,
root_data_dir
=
None
,
**
kwargs
):
super
(
TfHubMemoryUsageBenchmark
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
default_flags
,
**
kwargs
)
if
hub_model_handle_list
:
for
hub_model_handle
in
hub_model_handle_list
.
split
(
';'
):
# Converts a model handle of the form
# https://tfhub.dev/google/nnlm-en-dim128/1 to valid python method name
# like google_nnlm_en_dim128_1.
hub_model_method_name
=
hub_model_handle
.
replace
(
'https://tfhub.dev'
,
''
).
replace
(
'/'
,
'_'
).
replace
(
'-'
,
'_'
).
strip
(
'_'
)
setattr
(
self
,
'benchmark_'
+
hub_model_method_name
,
functools
.
partial
(
self
.
benchmark_memory_usage
,
hub_model_handle
))
def
benchmark_memory_usage
(
self
,
hub_model_handle
=
'https://tfhub.dev/google/nnlm-en-dim128/1'
):
start_time_sec
=
time
.
time
()
self
.
load_model
(
hub_model_handle
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
metrics
=
[]
self
.
report_benchmark
(
iters
=-
1
,
wall_time
=
wall_time_sec
,
metrics
=
metrics
)
def
load_model
(
self
,
hub_model_handle
):
"""Loads a TF Hub module."""
hub
.
load
(
hub_model_handle
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
2
3
4
5
6
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment