Commit 9485aa1d authored by qianyj's avatar qianyj
Browse files

Update code to v2.8.0

parents 89cfa348 f5fc733a
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with Cifar data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tempfile
import tensorflow as tf
from tensorflow.python.eager import context
from tensorflow.python.platform import googletest
from official.benchmark.models import cifar_preprocessing
from official.benchmark.models import resnet_cifar_main
from official.utils.testing import integration
class KerasCifarTest(googletest.TestCase):
"""Unit tests for Keras ResNet with Cifar."""
_extra_flags = [
"-batch_size", "4", "-train_steps", "1", "-use_synthetic_data", "true"
]
_tempdir = None
def get_temp_dir(self):
if not self._tempdir:
self._tempdir = tempfile.mkdtemp(dir=googletest.GetTempDir())
return self._tempdir
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasCifarTest, cls).setUpClass()
resnet_cifar_main.define_cifar_flags()
def setUp(self):
super(KerasCifarTest, self).setUp()
cifar_preprocessing.NUM_IMAGES["validation"] = 4
def tearDown(self):
super(KerasCifarTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
def test_end_to_end_no_dist_strat(self):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags = [
"-distribution_strategy",
"off",
"-model_dir",
"keras_cifar_no_dist_strat",
"-data_format",
"channels_last",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_graph_no_dist_strat(self):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags = [
"-enable_eager",
"false",
"-distribution_strategy",
"off",
"-model_dir",
"keras_cifar_graph_no_dist_strat",
"-data_format",
"channels_last",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_1_gpu(self):
"""Test Keras model with 1 GPU."""
if context.num_gpus() < 1:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(1, context.num_gpus()))
extra_flags = [
"-num_gpus",
"1",
"-distribution_strategy",
"mirrored",
"-model_dir",
"keras_cifar_1_gpu",
"-data_format",
"channels_last",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_graph_1_gpu(self):
"""Test Keras model in legacy graph mode with 1 GPU."""
if context.num_gpus() < 1:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(1, context.num_gpus()))
extra_flags = [
"-num_gpus",
"1",
"-noenable_eager",
"-distribution_strategy",
"mirrored",
"-model_dir",
"keras_cifar_graph_1_gpu",
"-data_format",
"channels_last",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_2_gpu(self):
"""Test Keras model with 2 GPUs."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-distribution_strategy",
"mirrored",
"-model_dir",
"keras_cifar_2_gpu",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_graph_2_gpu(self):
"""Test Keras model in legacy graph mode with 2 GPUs."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-enable_eager",
"false",
"-distribution_strategy",
"mirrored",
"-model_dir",
"keras_cifar_graph_2_gpu",
]
extra_flags = extra_flags + self._extra_flags
integration.run_synthetic(
main=resnet_cifar_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
if __name__ == "__main__":
googletest.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
import os
# Import libraries
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
from official.common import distribute_utils
from official.legacy.image_classification import test_utils
from official.legacy.image_classification.resnet import common
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.legacy.image_classification.resnet import resnet_model
from official.modeling import performance
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.utils.misc import model_helpers
def _cluster_last_three_conv2d_layers(model):
"""Helper method to cluster last three conv2d layers."""
import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top
last_three_conv2d_layers = [
layer for layer in model.layers
if isinstance(layer, tf.keras.layers.Conv2D)
][-3:]
cluster_weights = tfmot.clustering.keras.cluster_weights
centroid_initialization = tfmot.clustering.keras.CentroidInitialization
def cluster_fn(layer):
if layer not in last_three_conv2d_layers:
return layer
if layer == last_three_conv2d_layers[0] or \
layer == last_three_conv2d_layers[1]:
clustered = cluster_weights(layer, number_of_clusters=256, \
cluster_centroids_init=centroid_initialization.LINEAR)
print('Clustered {} with 256 clusters'.format(layer.name))
else:
clustered = cluster_weights(layer, number_of_clusters=32, \
cluster_centroids_init=centroid_initialization.LINEAR)
print('Clustered {} with 32 clusters'.format(layer.name))
return clustered
return tf.keras.models.clone_model(model, clone_function=cluster_fn)
def run(flags_obj):
"""Run ResNet ImageNet training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
NotImplementedError: If some features are not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils.set_session_config(
enable_xla=flags_obj.enable_xla)
# Execute flag override logic for better model performance
if flags_obj.tf_gpu_thread_mode:
keras_utils.set_gpu_thread_mode_and_count(
per_gpu_thread_count=flags_obj.per_gpu_thread_count,
gpu_thread_mode=flags_obj.tf_gpu_thread_mode,
num_gpus=flags_obj.num_gpus,
datasets_num_private_threads=flags_obj.datasets_num_private_threads)
common.set_cudnn_batchnorm_mode()
dtype = flags_core.get_tf_dtype(flags_obj)
performance.set_mixed_precision_policy(
flags_core.get_tf_dtype(flags_obj))
data_format = flags_obj.data_format
if data_format is None:
data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
else 'channels_last')
tf.keras.backend.set_image_data_format(data_format)
# Configures cluster spec for distribution strategy.
_ = distribute_utils.configure_cluster(flags_obj.worker_hosts,
flags_obj.task_index)
strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus,
all_reduce_alg=flags_obj.all_reduce_alg,
num_packs=flags_obj.num_packs,
tpu_address=flags_obj.tpu)
if strategy:
# flags_obj.enable_get_next_as_optional controls whether enabling
# get_next_as_optional behavior in DistributedIterator. If true, last
# partial batch can be supported.
strategy.extended.experimental_enable_get_next_as_optional = (
flags_obj.enable_get_next_as_optional
)
strategy_scope = distribute_utils.get_strategy_scope(strategy)
# pylint: disable=protected-access
if flags_obj.use_synthetic_data:
input_fn = common.get_synth_input_fn(
height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
num_channels=imagenet_preprocessing.NUM_CHANNELS,
num_classes=imagenet_preprocessing.NUM_CLASSES,
dtype=dtype,
drop_remainder=True)
else:
input_fn = imagenet_preprocessing.input_fn
# When `enable_xla` is True, we always drop the remainder of the batches
# in the dataset, as XLA-GPU doesn't support dynamic shapes.
drop_remainder = flags_obj.enable_xla
# Current resnet_model.resnet50 input format is always channel-last.
# We use keras_application mobilenet model which input format is depends on
# the keras beckend image data format.
# This use_keras_image_data_format flags indicates whether image preprocessor
# output format should be same as the keras backend image data format or just
# channel-last format.
use_keras_image_data_format = \
(flags_obj.model == 'mobilenet' or flags_obj.model == 'mobilenet_pretrained')
train_input_dataset = input_fn(
is_training=True,
data_dir=flags_obj.data_dir,
batch_size=flags_obj.batch_size,
parse_record_fn=imagenet_preprocessing.get_parse_record_fn(
use_keras_image_data_format=use_keras_image_data_format),
datasets_num_private_threads=flags_obj.datasets_num_private_threads,
dtype=dtype,
drop_remainder=drop_remainder,
tf_data_experimental_slack=flags_obj.tf_data_experimental_slack,
training_dataset_cache=flags_obj.training_dataset_cache,
)
eval_input_dataset = None
if not flags_obj.skip_eval:
eval_input_dataset = input_fn(
is_training=False,
data_dir=flags_obj.data_dir,
batch_size=flags_obj.batch_size,
parse_record_fn=imagenet_preprocessing.get_parse_record_fn(
use_keras_image_data_format=use_keras_image_data_format),
dtype=dtype,
drop_remainder=drop_remainder)
lr_schedule = common.PiecewiseConstantDecayWithWarmup(
batch_size=flags_obj.batch_size,
epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
warmup_epochs=common.LR_SCHEDULE[0][1],
boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
multipliers=list(p[0] for p in common.LR_SCHEDULE),
compute_lr_on_cpu=True)
steps_per_epoch = (
imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
with strategy_scope:
if flags_obj.optimizer == 'resnet50_default':
optimizer = common.get_optimizer(lr_schedule)
elif flags_obj.optimizer == 'mobilenet_default' or flags_obj.optimizer == 'mobilenet_fine_tune':
initial_learning_rate = \
flags_obj.initial_learning_rate_per_sample * flags_obj.batch_size
if flags_obj.optimizer == 'mobilenet_fine_tune':
initial_learning_rate = 1e-5
optimizer = tf.keras.optimizers.SGD(
learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=steps_per_epoch * flags_obj.num_epochs_per_decay,
decay_rate=flags_obj.lr_decay_factor,
staircase=True),
momentum=0.9)
optimizer = performance.configure_optimizer(
optimizer,
use_float16=flags_core.get_tf_dtype(flags_obj) == tf.float16,
loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128),)
# TODO(hongkuny): Remove trivial model usage and move it to benchmark.
if flags_obj.use_trivial_model:
model = test_utils.trivial_model(imagenet_preprocessing.NUM_CLASSES)
elif flags_obj.model == 'resnet50_v1.5':
model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES)
elif flags_obj.model == 'mobilenet' or flags_obj.model == 'mobilenet_pretrained':
# TODO(kimjaehong): Remove layers attribute when minimum TF version
# support 2.0 layers by default.
if flags_obj.model == 'mobilenet_pretrained':
classes_labels = 1000
initial_weights = 'imagenet'
else:
classes_labels = imagenet_preprocessing.NUM_CLASSES
initial_weights = None
model = tf.keras.applications.mobilenet.MobileNet(
weights=initial_weights,
classes=classes_labels,
layers=tf.keras.layers)
if flags_obj.pretrained_filepath:
model.load_weights(flags_obj.pretrained_filepath)
if flags_obj.pruning_method == 'polynomial_decay':
import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top
if dtype != tf.float32:
raise NotImplementedError(
'Pruning is currently only supported on dtype=tf.float32.')
pruning_params = {
'pruning_schedule':
tfmot.sparsity.keras.PolynomialDecay(
initial_sparsity=flags_obj.pruning_initial_sparsity,
final_sparsity=flags_obj.pruning_final_sparsity,
begin_step=flags_obj.pruning_begin_step,
end_step=flags_obj.pruning_end_step,
frequency=flags_obj.pruning_frequency),
}
model = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)
elif flags_obj.pruning_method:
raise NotImplementedError('Only polynomial_decay is currently supported.')
if flags_obj.clustering_method == 'selective_clustering':
import tensorflow_model_optimization as tfmot # pylint: disable=g-import-not-at-top
if dtype != tf.float32:
raise NotImplementedError(
'Clustering is currently only supported on dtype=tf.float32.')
model = _cluster_last_three_conv2d_layers(model)
elif flags_obj.clustering_method:
raise NotImplementedError(
'Only selective_clustering is implemented.')
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
callbacks = common.get_callbacks(
pruning_method=flags_obj.pruning_method,
enable_checkpoint_and_export=flags_obj.enable_checkpoint_and_export,
model_dir=flags_obj.model_dir)
# If mutliple epochs, ignore the train_steps flag.
if train_epochs <= 1 and flags_obj.train_steps:
steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch)
train_epochs = 1
num_eval_steps = (
imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size)
validation_data = eval_input_dataset
if flags_obj.skip_eval:
# Only build the training graph. This reduces memory usage introduced by
# control flow ops in layers that have different implementations for
# training and inference (e.g., batch norm).
if flags_obj.set_learning_phase_to_train:
# TODO(haoyuzhang): Understand slowdown of setting learning phase when
# not using distribution strategy.
tf.keras.backend.set_learning_phase(1)
num_eval_steps = None
validation_data = None
if not strategy and flags_obj.explicit_gpu_placement:
# TODO(b/135607227): Add device scope automatically in Keras training loop
# when not using distribution strategy.
no_dist_strat_device = tf.device('/device:GPU:0')
no_dist_strat_device.__enter__()
history = model.fit(train_input_dataset,
epochs=train_epochs,
steps_per_epoch=steps_per_epoch,
callbacks=callbacks,
validation_steps=num_eval_steps,
validation_data=validation_data,
validation_freq=flags_obj.epochs_between_evals,
verbose=2)
eval_output = None
if not flags_obj.skip_eval:
eval_output = model.evaluate(eval_input_dataset,
steps=num_eval_steps,
verbose=2)
if flags_obj.pruning_method:
model = tfmot.sparsity.keras.strip_pruning(model)
if flags_obj.clustering_method:
model = tfmot.clustering.keras.strip_clustering(model)
if flags_obj.enable_checkpoint_and_export:
if dtype == tf.bfloat16:
logging.warning('Keras model.save does not support bfloat16 dtype.')
else:
# Keras model.save assumes a float32 input designature.
export_path = os.path.join(flags_obj.model_dir, 'saved_model')
model.save(export_path, include_optimizer=False)
if not strategy and flags_obj.explicit_gpu_placement:
no_dist_strat_device.__exit__()
stats = common.build_stats(history, eval_output, callbacks)
return stats
def define_imagenet_keras_flags():
common.define_keras_flags(
model=True,
optimizer=True,
pretrained_filepath=True)
common.define_pruning_flags()
common.define_clustering_flags()
flags_core.set_defaults()
flags.adopt_module_key_flags(common)
def main(_):
model_helpers.apply_clean(flags.FLAGS)
stats = run(flags.FLAGS)
logging.info('Run stats:\n%s', stats)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_imagenet_keras_flags()
app.run(main)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.eager import context
from official.benchmark.models import resnet_imagenet_main
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.utils.testing import integration
@parameterized.parameters(
"resnet",
# "resnet_polynomial_decay", b/151854314
"mobilenet",
# "mobilenet_polynomial_decay", b/151854314
"mobilenet_selective_clustering",
)
class KerasImagenetTest(tf.test.TestCase):
"""Unit tests for Keras Models with ImageNet."""
_default_flags_dict = [
"-batch_size",
"4",
"-train_steps",
"1",
"-use_synthetic_data",
"true",
"-data_format",
"channels_last",
]
_extra_flags_dict = {
"resnet": [
"-model",
"resnet50_v1.5",
"-optimizer",
"resnet50_default",
],
"resnet_polynomial_decay": [
"-model",
"resnet50_v1.5",
"-optimizer",
"resnet50_default",
"-pruning_method",
"polynomial_decay",
],
"mobilenet": [
"-model",
"mobilenet",
"-optimizer",
"mobilenet_default",
],
"mobilenet_polynomial_decay": [
"-model",
"mobilenet",
"-optimizer",
"mobilenet_default",
"-pruning_method",
"polynomial_decay",
],
"mobilenet_selective_clustering": [
"-model", "mobilenet_pretrained",
"-optimizer", "mobilenet_fine_tune",
"-clustering_method", "selective_clustering",
]
}
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasImagenetTest, cls).setUpClass()
resnet_imagenet_main.define_imagenet_keras_flags()
def setUp(self):
super(KerasImagenetTest, self).setUp()
imagenet_preprocessing.NUM_IMAGES["validation"] = 4
self.policy = tf.keras.mixed_precision.global_policy()
def tearDown(self):
super(KerasImagenetTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
tf.keras.mixed_precision.set_global_policy(self.policy)
def get_extra_flags_dict(self, flags_key):
return self._extra_flags_dict[flags_key] + self._default_flags_dict
def test_end_to_end_no_dist_strat(self, flags_key):
"""Test Keras model with 1 GPU, no distribution strategy."""
extra_flags = [
"-distribution_strategy",
"off",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_graph_no_dist_strat(self, flags_key):
"""Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
extra_flags = [
"-enable_eager",
"false",
"-distribution_strategy",
"off",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_1_gpu(self, flags_key):
"""Test Keras model with 1 GPU."""
if context.num_gpus() < 1:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(1, context.num_gpus()))
extra_flags = [
"-num_gpus",
"1",
"-distribution_strategy",
"mirrored",
"-enable_checkpoint_and_export",
"1",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_1_gpu_fp16(self, flags_key):
"""Test Keras model with 1 GPU and fp16."""
if context.num_gpus() < 1:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(1, context.num_gpus()))
extra_flags = [
"-num_gpus",
"1",
"-dtype",
"fp16",
"-distribution_strategy",
"mirrored",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
if "polynomial_decay" in extra_flags:
self.skipTest("Pruning with fp16 is currently not supported.")
if "selective_clustering" in extra_flags:
self.skipTest("Clustering with fp16 is currently not supported.")
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_2_gpu(self, flags_key):
"""Test Keras model with 2 GPUs."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-distribution_strategy",
"mirrored",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_xla_2_gpu(self, flags_key):
"""Test Keras model with XLA and 2 GPUs."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-enable_xla",
"true",
"-distribution_strategy",
"mirrored",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_2_gpu_fp16(self, flags_key):
"""Test Keras model with 2 GPUs and fp16."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-dtype",
"fp16",
"-distribution_strategy",
"mirrored",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
if "polynomial_decay" in extra_flags:
self.skipTest("Pruning with fp16 is currently not supported.")
if "selective_clustering" in extra_flags:
self.skipTest("Clustering with fp16 is currently not supported.")
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
def test_end_to_end_xla_2_gpu_fp16(self, flags_key):
"""Test Keras model with XLA, 2 GPUs and fp16."""
if context.num_gpus() < 2:
self.skipTest(
"{} GPUs are not available for this test. {} GPUs are available"
.format(2, context.num_gpus()))
extra_flags = [
"-num_gpus",
"2",
"-dtype",
"fp16",
"-enable_xla",
"true",
"-distribution_strategy",
"mirrored",
]
extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
if "polynomial_decay" in extra_flags:
self.skipTest("Pruning with fp16 is currently not supported.")
if "selective_clustering" in extra_flags:
self.skipTest("Clustering with fp16 is currently not supported.")
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
if __name__ == "__main__":
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test the keras ResNet model with ImageNet data on TPU."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from official.benchmark.models import resnet_imagenet_main
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.utils.testing import integration
class KerasImagenetTest(tf.test.TestCase, parameterized.TestCase):
"""Unit tests for Keras Models with ImageNet."""
_extra_flags_dict = {
"resnet": [
"-batch_size",
"4",
"-train_steps",
"1",
"-use_synthetic_data",
"true"
"-model",
"resnet50_v1.5",
"-optimizer",
"resnet50_default",
],
"resnet_polynomial_decay": [
"-batch_size",
"4",
"-train_steps",
"1",
"-use_synthetic_data",
"true",
"-model",
"resnet50_v1.5",
"-optimizer",
"resnet50_default",
"-pruning_method",
"polynomial_decay",
],
}
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasImagenetTest, cls).setUpClass()
resnet_imagenet_main.define_imagenet_keras_flags()
def setUp(self):
super(KerasImagenetTest, self).setUp()
imagenet_preprocessing.NUM_IMAGES["validation"] = 4
self.policy = tf.keras.mixed_precision.global_policy()
def tearDown(self):
super(KerasImagenetTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
tf.keras.mixed_precision.set_global_policy(self.policy)
@parameterized.parameters([
"resnet",
# "resnet_polynomial_decay" b/151854314
])
def test_end_to_end_tpu(self, flags_key):
"""Test Keras model with TPU distribution strategy."""
extra_flags = [
"-distribution_strategy",
"tpu",
"-data_format",
"channels_last",
"-enable_checkpoint_and_export",
"1",
]
extra_flags = extra_flags + self._extra_flags_dict[flags_key]
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
@parameterized.parameters(["resnet"])
def test_end_to_end_tpu_bf16(self, flags_key):
"""Test Keras model with TPU and bfloat16 activation."""
extra_flags = [
"-distribution_strategy",
"tpu",
"-data_format",
"channels_last",
"-dtype",
"bf16",
]
extra_flags = extra_flags + self._extra_flags_dict[flags_key]
integration.run_synthetic(
main=resnet_imagenet_main.run,
tmp_root=self.get_temp_dir(),
extra_flags=extra_flags)
if __name__ == "__main__":
tf.test.main()
# Shakespeare character LSTM model
This is an implemention of a simple character LSTM used to generate text.
## Instructions
First download the source data:
```
wget https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
```
Note that files other than shakepeare.txt can also be used to train the model to generater other text.
Then train the model:
```python
python3 shakespeare_main.py --training_data shakespeare.txt \
--model_dir /tmp/shakespeare
```
This will place model checkpoints in `/tmp/shakespeare`, so that we can use them to make predictions.
Then generate predictions:
```python
python3 shakespeare_main.py --training_data shakespeare.txt \
--model_dir /tmp/shakespeare --notrain --predict_context=ROMEO:
```
Change `--predict_context` and `--predict_length` to suit your needs.
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a character LSTM model trained on Shakespeare."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import os
# pylint: disable=wrong-import-order
from absl import app
from absl import flags
import numpy as np
import tensorflow as tf
from official.common import distribute_utils
# pylint: enable=wrong-import-order
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
EMBEDDING_DIM = 256
RNN_UNITS = 1024
SEQ_LENGTH = 100
# Calculated by running batch_size=1
BATCHES_PER_EPOCH = 11043
def define_flags():
"""Define the flags for the Shakespeare character LSTM."""
flags_core.define_base(data_dir=False,
clean=False,
train_epochs=True,
epochs_between_evals=False,
stop_threshold=False,
num_gpu=True,
export_dir=False,
run_eagerly=True,
distribution_strategy=True)
flags_core.define_performance(num_parallel_calls=False,
inter_op=False,
intra_op=False,
synthetic_data=False,
max_train_steps=False,
dtype=True,
enable_xla=True)
flags_core.set_defaults(train_epochs=43,
batch_size=64)
flags.DEFINE_boolean(name='enable_eager', default=True, help='Enable eager?')
flags.DEFINE_boolean(
name='train', default=True,
help='If true trains the model.')
flags.DEFINE_string(
name='predict_context', default=None,
help='If set, makes a prediction with the given context.')
flags.DEFINE_integer(
name='predict_length', default=1000,
help='Length of the predicted text including the context.')
flags.DEFINE_integer(name='train_steps', default=None,
help='Overrides train_steps per epoch if not None.')
flags.DEFINE_integer(
name='log_steps', default=100,
help='For every log_steps, we log the timing information such as '
'examples per second.')
flags.DEFINE_string(
name='training_data', default=None,
help='Path to file containing the training data.')
flags.DEFINE_boolean(name='cudnn', default=True, help='Use CuDNN LSTM.')
def get_dataset(path_to_file, batch_size=None, seq_length=SEQ_LENGTH):
"""Creates a dataset from a given text file.
Args:
path_to_file: The path to the training data.
batch_size: Batch size to use.
seq_length: The length of the LSTM sequence.
Returns:
A tuple, consisting of the Dataset and the class to character mapping
and character to class mapping.
"""
with tf.io.gfile.GFile(path_to_file, 'rb') as train_data:
text = train_data.read().decode(encoding='utf-8')
# Create vocab
vocab = sorted(set(text))
char2idx = {u: i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
# Split text into sequence length + 1 chucks to create examples
text_as_int = np.array([char2idx[c] for c in text])
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(
seq_length + 1, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
def split_input_target(chunk):
input_text = chunk[:-1]
target_text = chunk[1:]
return input_text, tf.one_hot(target_text, len(vocab))
dataset = sequences.map(split_input_target)
dataset = dataset.shuffle(10000).repeat()
dataset = dataset.batch(
batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
return dataset, idx2char, char2idx
def build_model(vocab_size,
embedding_dim=EMBEDDING_DIM,
rnn_units=RNN_UNITS,
batch_size=None,
stateful=False,
use_cudnn=True):
"""Builds the Shakespeare model.
Args:
vocab_size: The number of character classes in the input.
embedding_dim: The dimension of the embedding space for each class.
rnn_units: The number of RNN units in the layer.
batch_size: When predicting, the batch size of the predictions.
stateful: If true, the LSTM is stateful.
Returns:
A Keras Model.
"""
LSTM = functools.partial(tf.keras.layers.LSTM, implementation=2)
# By indirecting the activation through a lambda layer, the logic to dispatch
# to CuDNN in V2 doesn't trigger and we force the LSTM to run in non-CuDNN
# mode.
lstm_activation = ('tanh' if use_cudnn else
lambda x: tf.math.tanh(x))
batch_shape = [batch_size if stateful else None, None]
return tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
batch_input_shape=batch_shape),
LSTM(rnn_units,
activation=lstm_activation,
return_sequences=True,
stateful=stateful,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size),
tf.keras.layers.Softmax(dtype=tf.float32)])
def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
"""Trains a Shakespeare model.
Args:
flags_obj: An object containing parsed flag values.s
dataset: the training data set.
vocab_size: the number of unique character classes.
strategy: distribution strategy to use.
checkpoint_dir: if not None, the directory in which to make checkpoints.
Returns:
The training history and callbacks.
"""
if flags_obj.train_steps:
train_steps = flags_obj.train_steps
else:
train_steps = BATCHES_PER_EPOCH // flags_obj.batch_size
strategy_scope = distribute_utils.get_strategy_scope(strategy)
with strategy_scope:
model = build_model(vocab_size=vocab_size, batch_size=flags_obj.batch_size,
use_cudnn=flags_obj.cudnn)
# Model.fit() automatically applies loss scaling so we don't need to create
# a LossScaleOptimizer.
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
run_eagerly=flags_obj.run_eagerly)
callbacks = []
if checkpoint_dir:
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True)
callbacks.append(checkpoint_callback)
time_callback = keras_utils.TimeHistory(flags_obj.batch_size,
flags_obj.log_steps)
callbacks.append(time_callback)
history = model.fit(dataset,
epochs=flags_obj.train_epochs,
steps_per_epoch=train_steps,
callbacks=callbacks,
verbose=2)
return history, callbacks
def make_prediction(checkpoint_dir, length, context, idx2char, char2idx):
"""Make predictions from a Shakespeare model.
Args:
checkpoint_dir: the directory from which to load checkpoints
length: the total length of the generated text (including the context).
context: the initial text with which the LSTM is primed.
idx2char: the character class to character mapping.
char2idx: the character to character class mapping.
Returns:
A generated string of text of the given length.
"""
prediction_model = build_model(
vocab_size=len(idx2char), batch_size=1, stateful=True)
prediction_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
prediction_model.build(tf.TensorShape([1, None]))
input_eval = [char2idx[s] for s in context]
input_eval = tf.expand_dims(input_eval, 0)
text_generated = []
prediction_model.reset_states()
for _ in range(length - len(context)):
predictions = prediction_model(input_eval)
predictions = tf.squeeze(predictions, 0)
# We applied a softmax to the output of the model so that
# tf.keras.metrics.Recall would work. We need logits for
# tf.random.categorical, so we convert the probabilities back to log odds
predictions = tf.math.log(predictions / (1 - predictions))
random_output = tf.random.categorical(predictions, num_samples=1)
selected_id = random_output[-1, 0].numpy()
input_eval = tf.expand_dims([selected_id], 0)
text_generated.append(idx2char[selected_id])
return context + ''.join(text_generated)
def run(flags_obj):
"""Run Shakespeare training and predict.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dictionary with status from the run.
"""
if not flags_obj.training_data:
raise ValueError(
'Must set the path to a training data file. e.g download the following '
'https://storage.googleapis.com/download.tensorflow.org/data/'
'shakespeare.txt')
if flags_obj.dtype == 'fp16':
tf.keras.mixed_precision.set_global_policy('mixed_float16')
keras_utils.set_session_config(
enable_xla=flags_obj.enable_xla)
strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus)
dataset, idx2char, char2idx = get_dataset(flags_obj.training_data,
batch_size=flags_obj.batch_size)
stats = {}
if flags_obj.train:
history, callbacks = train_model(flags_obj, dataset,
len(idx2char), strategy,
checkpoint_dir=flags_obj.model_dir)
stats['history'] = history.history
stats['callbacks'] = callbacks
if flags_obj.predict_context:
if not flags_obj.model_dir:
raise ValueError('Must set model_dir to get predictions.')
print(make_prediction(flags_obj.model_dir,
flags_obj.predict_length,
flags_obj.predict_context,
idx2char,
char2idx))
return stats
def main(_):
flags_obj = flags.FLAGS
run(flags_obj)
if __name__ == '__main__':
define_flags()
app.run(main)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions to generate data directly on devices."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import random
import string
from absl import logging
import tensorflow as tf
# The `SyntheticDataset` is a temporary solution for generating synthetic data
# directly on devices. It is only useful for Keras with Distribution
# Strategies. We will have better support in `tf.data` or Distribution Strategy
# later.
class SyntheticDataset(object):
"""A dataset that generates synthetic data on each device."""
def __init__(self, dataset, split_by=1):
# dataset.take(1) doesn't have GPU kernel.
with tf.device('device:CPU:0'):
tensor = tf.data.experimental.get_single_element(dataset.take(1))
flat_tensor = tf.nest.flatten(tensor)
variable_data = []
initializers = []
for t in flat_tensor:
rebatched_t = tf.split(t, num_or_size_splits=split_by, axis=0)[0]
assert rebatched_t.shape.is_fully_defined(), rebatched_t.shape
v = tf.compat.v1.get_local_variable(
self._random_name(), initializer=rebatched_t)
variable_data.append(v)
initializers.append(v.initializer)
input_data = tf.nest.pack_sequence_as(tensor, variable_data)
self._iterator = SyntheticIterator(input_data, initializers)
def _random_name(self, size=10, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def __iter__(self):
return self._iterator
def make_one_shot_iterator(self):
return self._iterator
def make_initializable_iterator(self):
return self._iterator
class SyntheticIterator(object):
"""A dataset that generates synthetic data on each device."""
def __init__(self, input_data, initializers):
self._input_data = input_data
self._initializers = initializers
def get_next(self):
return self._input_data
def next(self):
return self.__next__()
def __next__(self):
try:
return self.get_next()
except tf.errors.OutOfRangeError:
raise StopIteration
def initialize(self):
if tf.executing_eagerly():
return tf.no_op()
else:
return self._initializers
def _monkey_patch_dataset_method(strategy):
"""Monkey-patch `strategy`'s `make_dataset_iterator` method."""
def make_dataset(self, dataset):
logging.info('Using pure synthetic data.')
with self.scope():
if self.extended._global_batch_size: # pylint: disable=protected-access
return SyntheticDataset(dataset, self.num_replicas_in_sync)
else:
return SyntheticDataset(dataset)
def make_iterator(self, dataset):
dist_dataset = make_dataset(self, dataset)
return iter(dist_dataset)
strategy.orig_make_dataset_iterator = strategy.make_dataset_iterator
strategy.make_dataset_iterator = make_iterator
strategy.orig_distribute_dataset = strategy.experimental_distribute_dataset
strategy.experimental_distribute_dataset = make_dataset
def _undo_monkey_patch_dataset_method(strategy):
if hasattr(strategy, 'orig_make_dataset_iterator'):
strategy.make_dataset_iterator = strategy.orig_make_dataset_iterator
if hasattr(strategy, 'orig_distribute_dataset'):
strategy.make_dataset_iterator = strategy.orig_distribute_dataset
def set_up_synthetic_data():
_monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy)
_monkey_patch_dataset_method(tf.distribute.MirroredStrategy)
_monkey_patch_dataset_method(
tf.distribute.experimental.MultiWorkerMirroredStrategy)
def undo_set_up_synthetic_data():
_undo_monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy)
_undo_monkey_patch_dataset_method(tf.distribute.MirroredStrategy)
_undo_monkey_patch_dataset_method(
tf.distribute.experimental.MultiWorkerMirroredStrategy)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
from absl import flags
from absl import logging
from absl.testing import flagsaver
import tensorflow as tf
from official.benchmark import benchmark_wrappers
from official.benchmark import owner_utils
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core
FLAGS = flags.FLAGS
NCF_DATA_DIR_NAME = 'movielens_data'
NCF_TF_REGRESSION_DATA_DIR_NAME = 'gs://tf-regression/ncf/data'
class NCFKerasBenchmarkBase(PerfZeroBenchmark):
"""Base class for NCF model benchmark."""
def __init__(self, output_dir=None, default_flags=None, **kwargs):
super(NCFKerasBenchmarkBase, self).__init__(output_dir, default_flags,
**kwargs)
# Run all benchmarks with ml_perf flag.
self.default_flags['ml_perf'] = True
def _setup(self):
"""Sets up and resets flags before each test."""
logging.set_verbosity(logging.INFO)
if NCFKerasBenchmarkBase.local_flags is None:
ncf_common.define_ncf_flags()
# Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo'])
core.set_defaults(**self.default_flags)
saved_flag_values = flagsaver.save_flag_values()
NCFKerasBenchmarkBase.local_flags = saved_flag_values
else:
flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
start_time_sec = time.time()
stats = ncf_keras_main.run_ncf(FLAGS)
wall_time_sec = time.time() - start_time_sec
metrics = []
metrics.append({
'name': 'exp_per_second',
'value': stats['avg_exp_per_second']
})
if hr_at_10_min > 0:
metrics.append({
'name': 'hr_at_10',
'value': stats['eval_hit_rate'],
'min_value': hr_at_10_min,
'max_value': hr_at_10_max
})
metrics.append({'name': 'train_loss', 'value': stats['loss']})
self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
"""Benchmark NCF model using real data."""
def __init__(self,
output_dir=None,
root_data_dir=None,
default_flags=None,
**kwargs):
root_data_dir = root_data_dir if root_data_dir else ''
default_flags = {}
default_flags['dataset'] = 'ml-20m'
default_flags['num_gpus'] = 1
default_flags['train_epochs'] = 10
default_flags['clean'] = True
default_flags['batch_size'] = 99000
default_flags['learning_rate'] = 0.00382059
default_flags['beta1'] = 0.783529
default_flags['beta2'] = 0.909003
default_flags['epsilon'] = 1.45439e-07
default_flags['layers'] = [256, 256, 128, 64]
default_flags['num_factors'] = 64
default_flags['hr_threshold'] = 0.635
default_flags['ml_perf'] = True
default_flags['use_synthetic_data'] = False
default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
super(NCFKerasAccuracy, self).__init__(
output_dir=output_dir, default_flags=default_flags, **kwargs)
def _run_and_report_benchmark_mlperf_like(self):
"""Run test and report results.
Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
we want it recorded.
"""
self._run_and_report_benchmark(hr_at_10_min=0.61)
def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.645):
"""Run test and report results.
Note: Target is 0.635, but some runs are below that level. Until we have
multi-run tests, we have to accept a lower target.
Args:
hr_at_10_min: Minimum acceptable hr@10 value.
hr_at_10_max: Maximum acceptable hr@10 value.
"""
super(NCFKerasAccuracy, self)._run_and_report_benchmark(
hr_at_10_min=hr_at_10_min, hr_at_10_max=hr_at_10_max)
def _set_8_gpu_defaults(self):
FLAGS.num_gpus = 8
FLAGS.learning_rate = 0.0045
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
FLAGS.train_epochs = 14
FLAGS.batch_size = 99000
FLAGS.eval_batch_size = 160000
FLAGS.train_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
'training_cycle_*/*')
FLAGS.eval_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
'eval_data/*')
FLAGS.input_meta_data_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
'metadata')
FLAGS.data_dir = NCF_TF_REGRESSION_DATA_DIR_NAME
def benchmark_1_gpu_early_stop(self):
self._setup()
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
self._setup()
FLAGS.distribution_strategy = 'off'
FLAGS.early_stopping = True
FLAGS.run_eagerly = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_1_gpu_ctl_run_eagerly_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
FLAGS.run_eagerly = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_2_gpus_early_stop(self):
self._setup()
FLAGS.early_stopping = True
FLAGS.num_gpus = 2
FLAGS.eval_batch_size = 160000
self._run_and_report_benchmark()
def benchmark_2_gpus_ctl_early_stop(self):
"""NCF with custom training loop. Works only in TF 2.0."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
FLAGS.num_gpus = 2
FLAGS.eval_batch_size = 160000
self._run_and_report_benchmark()
#############################################
# Tests below with mlperf in the test name are of two types:
# 1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
# 2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
#
# The purpose of both is to get a number to compare to existing results. To do
# this the number of epochs is held constant rather than a race to a given
# accuracy. The accuracy validation is done by the "early_stop" tests.
#############################################
def benchmark_1_gpu_mlperf_like(self):
"""1 GPU using keras fit/compile."""
self._setup()
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
"""1 GPU using compile/fit without dist_strat."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
self._setup()
FLAGS.train_epochs = 7
FLAGS.distribution_strategy = 'off'
FLAGS.run_eagerly = True
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_mlperf_like(self):
"""1 GPU using compile/fit with XLA."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.enable_xla = True
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_mlperf_like(self):
"""1 GPU using CTL."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL and FP16."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_fp16_mlperf_like(self):
"""1 GPU using FP16."""
self._setup()
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
"""1 GPU using CTL with eager and distribution strategy."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.run_eagerly = True
FLAGS.train_epochs = 7
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_ctl_mlperf_like(self):
"""1 GPU using CTL with XLA."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_fp16_mlperf_like(self):
"""1 GPU using with XLA and FP16."""
self._setup()
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
"""1 GPU using CTL with XLA and FP16."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.enable_xla = True
FLAGS.train_epochs = 7
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_mlperf_like(self):
"""8 GPU using keras fit/compile."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.train_epochs = 17
FLAGS.batch_size = 1048576
FLAGS.eval_batch_size = 160000
FLAGS.learning_rate = 0.0045
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_ctl_mlperf_like(self):
"""8 GPU using CTL."""
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.num_gpus = 8
FLAGS.train_epochs = 17
FLAGS.batch_size = 1048576
FLAGS.eval_batch_size = 160000
FLAGS.learning_rate = 0.0045
FLAGS.beta1 = 0.25
FLAGS.beta2 = 0.5
FLAGS.epsilon = 1e-8
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_tf_data_ctl_mlperf_like(self):
"""8 GPU using CTL."""
self._setup()
self._set_8_gpu_defaults()
FLAGS.keras_use_ctl = True
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_tf_data_fp16_mlperf_like(self):
"""8 GPU FP16."""
self._setup()
self._set_8_gpu_defaults()
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
def benchmark_8_gpu_tf_data_ctl_fp16_mlperf_like(self):
"""8 GPU FP16 using CTL."""
self._setup()
self._set_8_gpu_defaults()
FLAGS.keras_use_ctl = True
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 8192
self._run_and_report_benchmark_mlperf_like()
class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
"""NCF Keras throughput benchmarks."""
def __init__(self,
output_dir=None,
root_data_dir=None,
default_flags=None,
**kwargs):
root_data_dir = root_data_dir if root_data_dir else ''
default_flags = {}
default_flags['dataset'] = 'ml-20m'
default_flags['num_gpus'] = 1
default_flags['train_epochs'] = 14
default_flags['clean'] = True
default_flags['batch_size'] = 99000
default_flags['eval_batch_size'] = 160000
default_flags['learning_rate'] = 0.00382059
default_flags['beta1'] = 0.783529
default_flags['beta2'] = 0.909003
default_flags['epsilon'] = 1.45439e-07
default_flags['layers'] = [256, 256, 128, 64]
default_flags['num_factors'] = 64
default_flags['hr_threshold'] = 0.635
default_flags['ml_perf'] = True
default_flags['use_synthetic_data'] = False
default_flags['train_dataset_path'] = os.path.join(
NCF_TF_REGRESSION_DATA_DIR_NAME, 'training_cycle_*/*')
default_flags['eval_dataset_path'] = os.path.join(
NCF_TF_REGRESSION_DATA_DIR_NAME, 'eval_data/*')
default_flags['input_meta_data_path'] = os.path.join(
NCF_TF_REGRESSION_DATA_DIR_NAME, 'metadata')
default_flags['data_dir'] = NCF_TF_REGRESSION_DATA_DIR_NAME
super(NCFKerasBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=default_flags, **kwargs)
def benchmark_2x2_tpu(self):
"""2x2 TPU using CTL with distribution strategy."""
self._setup()
FLAGS.distribution_strategy = 'tpu'
FLAGS.keras_use_ctl = True
FLAGS.num_gpus = 0
FLAGS.train_epochs = 1
self._run_and_report_benchmark()
@owner_utils.Owner('tf-graph-compiler')
def benchmark_2x2_tpu_mlir(self):
"""2x2 TPU using CTL with distribution strategy using the MLIR bridge."""
self._setup()
FLAGS.distribution_strategy = 'tpu'
FLAGS.keras_use_ctl = True
FLAGS.num_gpus = 0
FLAGS.train_epochs = 1
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark()
class NCFKerasSynth(NCFKerasBenchmarkBase):
"""Benchmark NCF model using synthetic data."""
def __init__(self, output_dir=None, default_flags=None, **kwargs):
default_flags = {}
default_flags['dataset'] = 'ml-20m'
default_flags['num_gpus'] = 1
default_flags['train_epochs'] = 8
default_flags['batch_size'] = 99000
default_flags['eval_batch_size'] = 160000
default_flags['learning_rate'] = 0.00382059
default_flags['beta1'] = 0.783529
default_flags['beta2'] = 0.909003
default_flags['epsilon'] = 1.45439e-07
default_flags['layers'] = [256, 256, 128, 64]
default_flags['num_factors'] = 64
default_flags['hr_threshold'] = 0.635
default_flags['use_synthetic_data'] = True
super(NCFKerasSynth, self).__init__(
output_dir=output_dir, default_flags=default_flags, **kwargs)
def benchmark_1_gpu(self):
self._setup()
self._run_and_report_benchmark()
def benchmark_2_gpus(self):
self._setup()
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes benchmark testing for bert pretraining."""
# pylint: disable=line-too-long
from __future__ import print_function
import time
from typing import Optional
from absl import flags
import tensorflow as tf
from official.benchmark import benchmark_wrappers
from official.benchmark import owner_utils
from official.benchmark import perfzero_benchmark
from official.projects.nhnet import trainer
from official.utils.flags import core as flags_core
MIN_LOSS = 0.40
MAX_LOSS = 0.55
NHNET_DATA = 'gs://tf-perfzero-data/nhnet/v1/processed/train.tfrecord*'
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_model.ckpt'
FLAGS = flags.FLAGS
class NHNetBenchmark(perfzero_benchmark.PerfZeroBenchmark):
"""Base benchmark class for NHNet."""
def __init__(self, output_dir=None, default_flags=None, tpu=None, **kwargs):
self.default_flags = default_flags or {}
flag_methods = trainer.define_flags()
super(NHNetBenchmark, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=flag_methods,
tpu=tpu,
**kwargs)
def _report_benchmark(self,
stats,
wall_time_sec,
max_value=None,
min_value=None):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
max_value: highest passing level.
min_value: lowest passing level.
"""
metrics = []
metrics.append({
'name': 'training_loss',
'value': stats['training_loss'],
'min_value': min_value,
'max_value': max_value
})
# These metrics are placeholders to avoid PerfZero failure.
metrics.append({
'name': 'exp_per_second',
'value': 0.0,
})
metrics.append({
'name': 'startup_time',
'value': 9999.,
})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=-1,
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class NHNetAccuracyBenchmark(NHNetBenchmark):
"""Benchmark accuracy tests for NHNet."""
def __init__(self,
output_dir: Optional[str] = None,
tpu: Optional[str] = None,
**kwargs):
default_flags = dict(
mode='train',
train_file_pattern=NHNET_DATA,
train_batch_size=1024,
model_type='nhnet',
len_title=15,
len_passage=200,
num_encoder_layers=12,
num_decoder_layers=12,
num_nhnet_articles=5,
steps_per_loop=1000,
params_override='init_from_bert2bert=false')
super(NHNetAccuracyBenchmark, self).__init__(
output_dir=output_dir, default_flags=default_flags, tpu=tpu, **kwargs)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, max_value=MAX_LOSS, min_value=MIN_LOSS):
"""Runs and reports the benchmark given the provided configuration."""
start_time_sec = time.time()
stats = trainer.run()
wall_time_sec = time.time() - start_time_sec
self._report_benchmark(
stats, wall_time_sec, max_value=max_value, min_value=min_value)
@owner_utils.Owner('tf-model-garden')
def benchmark_accuracy_4x4_tpu_f32_50k_steps(self):
"""Test bert pretraining with 4x4 TPU for 50k steps."""
# This is used for accuracy test.
self._setup()
FLAGS.train_steps = 50000
FLAGS.checkpoint_interval = FLAGS.train_steps
FLAGS.distribution_strategy = 'tpu'
FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH
FLAGS.model_dir = self._get_model_dir(
'benchmark_accuracy_4x4_tpu_bf32_50k_steps')
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_accuracy_4x4_tpu_f32_1k_steps(self):
"""Test bert pretraining with 4x4 TPU for 1k steps."""
self._setup()
FLAGS.train_steps = 1000
FLAGS.checkpoint_interval = FLAGS.train_steps
FLAGS.distribution_strategy = 'tpu'
FLAGS.model_dir = self._get_model_dir(
'benchmark_accuracy_4x4_tpu_bf32_1k_steps')
self._run_and_report_benchmark()
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils to set Owner annotations on benchmarks.
@owner_utils.Owner('owner_team/user') can be set either at the benchmark class
level / benchmark method level or both.
Runner frameworks can use owner_utils.GetOwner(benchmark_method) to get the
actual owner. Python inheritance for the owner attribute is respected. (E.g
method level owner takes precedence over class level).
See owner_utils_test for associated tests and more examples.
The decorator can be applied both at the method level and at the class level.
Simple example:
===============
class MLBenchmark:
@Owner('example_id')
def benchmark_method_1_gpu(self):
return True
"""
def Owner(owner_name):
"""Sets the owner attribute on a decorated method or class."""
def _Wrapper(func_or_class):
"""Sets the benchmark owner attribute."""
func_or_class.__benchmark__owner__ = owner_name
return func_or_class
return _Wrapper
def GetOwner(benchmark_method_or_class):
"""Gets the inherited owner attribute for this benchmark.
Checks for existence of __benchmark__owner__. If it's not present, looks for
it in the parent class's attribute list.
Args:
benchmark_method_or_class: A benchmark method or class.
Returns:
string - the associated owner if present / None.
"""
if hasattr(benchmark_method_or_class, '__benchmark__owner__'):
return benchmark_method_or_class.__benchmark__owner__
elif hasattr(benchmark_method_or_class, '__self__'):
if hasattr(benchmark_method_or_class.__self__, '__benchmark__owner__'):
return benchmark_method_or_class.__self__.__benchmark__owner__
return None
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.benchmark.owner_utils."""
from absl.testing import absltest
from official.benchmark import owner_utils
@owner_utils.Owner('static_owner')
def static_function(foo=5):
return foo
def static_function_without_owner(foo=5):
return foo
class BenchmarkClassWithoutOwner:
def method_without_owner(self):
return 100
@owner_utils.Owner('method_owner')
def method_with_owner(self):
return 200
@owner_utils.Owner('class_owner')
class SomeBenchmarkClass:
def method_inherited_owner(self):
return 123
@owner_utils.Owner('method_owner')
def method_override_owner(self):
return 345
@owner_utils.Owner('new_class_owner')
class InheritedClass(SomeBenchmarkClass):
def method_inherited_owner(self):
return 456
@owner_utils.Owner('new_method_owner')
def method_override_owner(self):
return 567
class OwnerUtilsTest(absltest.TestCase):
"""Tests to assert for owner decorator functionality."""
def test_owner_tag_missing(self):
self.assertEqual(None, owner_utils.GetOwner(static_function_without_owner))
benchmark_class = BenchmarkClassWithoutOwner()
self.assertEqual(None,
owner_utils.GetOwner(benchmark_class.method_without_owner))
self.assertEqual(100, benchmark_class.method_without_owner())
self.assertEqual('method_owner',
owner_utils.GetOwner(benchmark_class.method_with_owner))
self.assertEqual(200, benchmark_class.method_with_owner())
def test_owner_attributes_static(self):
self.assertEqual('static_owner', owner_utils.GetOwner(static_function))
self.assertEqual(5, static_function(5))
def test_owner_attributes_per_class(self):
level1 = SomeBenchmarkClass()
self.assertEqual('class_owner',
owner_utils.GetOwner(level1.method_inherited_owner))
self.assertEqual(123, level1.method_inherited_owner())
self.assertEqual('method_owner',
owner_utils.GetOwner(level1.method_override_owner))
self.assertEqual(345, level1.method_override_owner())
def test_owner_attributes_inherited_class(self):
level2 = InheritedClass()
self.assertEqual('new_class_owner',
owner_utils.GetOwner(level2.method_inherited_owner))
self.assertEqual(456, level2.method_inherited_owner())
self.assertEqual('new_method_owner',
owner_utils.GetOwner(level2.method_override_owner))
self.assertEqual(567, level2.method_override_owner())
if __name__ == '__main__':
absltest.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for creating PerfZero benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import flags
from absl import logging
from absl.testing import flagsaver
import tensorflow as tf
FLAGS = flags.FLAGS
class PerfZeroBenchmark(tf.test.Benchmark):
"""Common methods used in PerfZero Benchmarks.
Handles the resetting of flags between tests, loading of default_flags,
overriding of defaults. PerfZero (OSS) runs each test in a separate
process reducing some need to reset the flags.
"""
local_flags = None
def __init__(self,
output_dir=None,
default_flags=None,
root_data_dir=None,
flag_methods=None,
tpu=None):
"""Initialize class.
Args:
output_dir: Base directory to store all output for the test.
default_flags: Set of flags to pass to model.
root_data_dir: Optional param used by child classes to look for the
dataset.
flag_methods: Set of flag methods to run during setup.
tpu: (optional) TPU name to use in a TPU benchmark.
"""
if os.getenv('BENCHMARK_OUTPUT_DIR'):
self.output_dir = os.getenv('BENCHMARK_OUTPUT_DIR')
elif output_dir:
self.output_dir = output_dir
else:
self.output_dir = '/tmp'
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
if os.getenv('BENCHMARK_TPU'):
resolved_tpu = os.getenv('BENCHMARK_TPU')
elif tpu:
resolved_tpu = tpu
else:
resolved_tpu = None
if resolved_tpu:
# TPU models are expected to accept a --tpu=name flag. PerfZero creates
# the TPU at runtime and passes the TPU's name to this flag.
self.default_flags['tpu'] = resolved_tpu
logging.info('root_data_dir: %s', root_data_dir)
@property
def tpu(self):
return self.default_flags.get('tpu', None)
def _get_model_dir(self, folder_name):
"""Returns directory to store info, e.g. saved model and event log."""
return os.path.join(self.output_dir, folder_name)
def _setup(self):
"""Sets up and resets flags before each test."""
logging.set_verbosity(logging.INFO)
if PerfZeroBenchmark.local_flags is None:
for flag_method in self.flag_methods:
flag_method()
# Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo'])
# Overrides flag values with defaults for the class of tests.
for k, v in self.default_flags.items():
setattr(FLAGS, k, v)
saved_flag_values = flagsaver.save_flag_values()
PerfZeroBenchmark.local_flags = saved_flag_values
else:
flagsaver.restore_flag_values(PerfZeroBenchmark.local_flags)
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnet50 Keras core benchmark."""
import tempfile
import time
import tensorflow as tf
import tensorflow_datasets as tfds
from official.benchmark import perfzero_benchmark
def _decode_and_center_crop(image_bytes):
"""Crops to center of image with padding then scales image_size."""
shape = tf.image.extract_jpeg_shape(image_bytes)
image_height, image_width, image_size = shape[0], shape[1], 224
padded_center_crop_size = tf.cast(
((image_size / (image_size + 32)) *
tf.cast(tf.minimum(image_height, image_width), tf.float32)),
tf.int32,
)
offset_height = ((image_height - padded_center_crop_size) + 1) // 2
offset_width = ((image_width - padded_center_crop_size) + 1) // 2
crop_window = tf.stack([
offset_height, offset_width, padded_center_crop_size,
padded_center_crop_size
])
image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3)
return tf.image.resize(image, [image_size, image_size], method="bicubic")
def _preprocessing(data):
return (
tf.cast(_decode_and_center_crop(data["image"]), tf.float32),
data["label"],
)
def _run_benchmark():
"""Runs a resnet50 compile/fit() call and returns the wall time."""
tmp_dir = tempfile.mkdtemp()
start_time = time.time()
batch_size = 64
dataset = tfds.load(
"imagenette",
decoders={"image": tfds.decode.SkipDecoding()},
split="train",
)
dataset = (
dataset.cache().repeat(
2
) # Artificially increase time per epoch to make it easier to measure
.map(_preprocessing,
num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(
batch_size).prefetch(1))
with tf.distribute.MirroredStrategy().scope():
model = tf.keras.applications.ResNet50(weights=None)
model.compile(
optimizer=tf.compat.v1.mixed_precision
.enable_mixed_precision_graph_rewrite(
tf.keras.optimizers.Adam(), loss_scale="dynamic"),
loss="sparse_categorical_crossentropy",
)
tb_cbk = tf.keras.callbacks.TensorBoard(
f"{tmp_dir}/{tf.__version__}", profile_batch=300)
model.fit(dataset, verbose=2, epochs=3, callbacks=[tb_cbk])
end_time = time.time()
return end_time - start_time
class Resnet50KerasCoreBenchmark(perfzero_benchmark.PerfZeroBenchmark):
"""Resnet50 Keras core benchmarks."""
def benchmark_1_gpu(self):
wall_time = _run_benchmark()
self.report_benchmark(iters=-1, wall_time=wall_time)
def benchmark_1_gpu_avg_3(self):
num_trials = 3
wall_times = []
for _ in range(num_trials):
wall_times.append(_run_benchmark())
avg_wall_time = sum(wall_times) / float(len(wall_times))
self.report_benchmark(iters=-1, wall_time=avg_wall_time)
def benchmark_1_gpu_max_3(self):
num_trials = 3
wall_times = []
for _ in range(num_trials):
wall_times.append(_run_benchmark())
max_wall_time = max(wall_times)
self.report_benchmark(iters=-1, wall_time=max_wall_time)
if __name__ == "__main__":
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes CTL benchmarks and accuracy tests."""
# pylint: disable=line-too-long,g-bad-import-order
from __future__ import print_function
import os # pylint: disable=unused-import
import time
from absl import flags
import tensorflow as tf
from official.benchmark import owner_utils
from official.legacy.image_classification.resnet import common
from official.legacy.image_classification.resnet import resnet_ctl_imagenet_main
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
from official.benchmark import benchmark_wrappers
from official.utils.flags import core as flags_core
IMAGENET_DEFAULT_DATA_PATH = 'gs://mlcompass-data/imagenet/imagenet-2012-tfrecord'
# TODO(emizan) Remove comment once you make sure that dataset caching has similar or better
# performance as the uncached local SSD dataset below.
# IMAGENET_EXP_DATA_PATH = 'gs://mlcompass-data/imagenet/imagenet-2012-tfrecord'
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
FLAGS = flags.FLAGS
class CtlBenchmark(PerfZeroBenchmark):
"""Base benchmark class with methods to simplify testing."""
def __init__(self,
output_dir=None,
default_flags=None,
flag_methods=None,
**kwargs):
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
super(CtlBenchmark, self).__init__(
output_dir=output_dir,
default_flags=self.default_flags,
flag_methods=self.flag_methods,
**kwargs)
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None,
total_batch_size=None,
log_steps=None,
warmup=1,
start_time_sec=None):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
total_batch_size: Global batch-size.
log_steps: How often the log was created for stats['step_timestamp_log'].
warmup: number of entries in stats['step_timestamp_log'] to ignore.
start_time_sec: the start time of the program in seconds since epoch.
"""
metrics = []
if 'eval_acc' in stats:
metrics.append({
'name': 'accuracy_top_1',
'value': stats['eval_acc'],
'min_value': top_1_min,
'max_value': top_1_max
})
metrics.append({'name': 'eval_loss', 'value': stats['eval_loss']})
metrics.append({
'name': 'top_1_train_accuracy',
'value': stats['train_acc']
})
metrics.append({'name': 'train_loss', 'value': stats['train_loss']})
if (warmup and 'step_timestamp_log' in stats and
len(stats['step_timestamp_log']) > warmup + 1):
# first entry in the time_log is start of step 0. The rest of the
# entries are the end of each step recorded
time_log = stats['step_timestamp_log']
steps_elapsed = time_log[-1].batch_index - time_log[warmup].batch_index
time_elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
examples_per_sec = total_batch_size * (steps_elapsed / time_elapsed)
metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
if 'avg_exp_per_second' in stats:
metrics.append({
'name': 'avg_exp_per_second',
'value': stats['avg_exp_per_second']
})
if start_time_sec and 'step_timestamp_log' in stats:
time_log = stats['step_timestamp_log']
# time_log[0] is recorded at the beginning of the first step.
startup_time = time_log[0].timestamp - start_time_sec
metrics.append({'name': 'startup_time', 'value': startup_time})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=-1,
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class Resnet50CtlAccuracy(CtlBenchmark):
"""Benchmark accuracy tests for ResNet50 in CTL."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more named
arguments before updating the constructor.
"""
flag_methods = [common.define_keras_flags]
self.data_dir = os.path.join(root_data_dir, 'imagenet')
super(Resnet50CtlAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_8_gpu(self):
"""Test Keras model with eager, dist_strat and 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self):
"""Test Keras model with eager, 8 GPUs with tf.keras mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = resnet_ctl_imagenet_main.run(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
super(Resnet50CtlAccuracy, self)._report_benchmark(
stats,
wall_time_sec,
top_1_min=MIN_TOP_1_ACCURACY,
top_1_max=MAX_TOP_1_ACCURACY,
total_batch_size=FLAGS.batch_size,
log_steps=100,
start_time_sec=start_time_sec)
class Resnet50CtlBenchmarkBase(CtlBenchmark):
"""Resnet50 benchmarks."""
def __init__(self, output_dir=None, default_flags=None, **kwargs):
flag_methods = [common.define_keras_flags]
super(Resnet50CtlBenchmarkBase, self).__init__(
output_dir=output_dir,
flag_methods=flag_methods,
default_flags=default_flags,
**kwargs)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = resnet_ctl_imagenet_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
# Warmup means the number of logged step time entries that are excluded in
# performance report. Default to exclude 1 FLAGS.log_steps time.
super(Resnet50CtlBenchmarkBase, self)._report_benchmark(
stats,
wall_time_sec,
total_batch_size=FLAGS.batch_size,
log_steps=FLAGS.log_steps,
warmup=1,
start_time_sec=start_time_sec)
def benchmark_1_gpu_no_dist_strat(self):
"""Test Keras model with 1 GPU, no distribution strategy."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'off'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Test Keras model with 1 GPU."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16(self):
"""Test Keras model with 1 GPU with tf.keras mixed precision."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
def benchmark_1_gpu_eager(self):
"""Test Keras model with 1 GPU in pure eager mode."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_eager')
FLAGS.batch_size = 120
FLAGS.use_tf_function = False
FLAGS.use_tf_while_loop = False
FLAGS.single_l2_loss_op = True
self._run_and_report_benchmark()
def benchmark_1_gpu_fp16_eager(self):
"""Test Keras model with 1 GPU with fp16 and pure eager mode."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'one_device'
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_eager')
FLAGS.batch_size = 232
FLAGS.dtype = 'fp16'
FLAGS.use_tf_function = False
FLAGS.use_tf_while_loop = False
FLAGS.single_l2_loss_op = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Test Keras model with 8 GPUs."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp32_no_tf32(self):
"""Test Keras model with 8 GPUs.Runs in FP32 by disabling TF32 execution."""
self._setup()
tf.config.experimental.enable_tensor_float_32_execution(False)
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp32_no_tf32')
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self):
"""Test Keras model with 8 GPUs with tf.keras mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16(self):
"""Test Keras model with 8 GPUs with tf.keras mixed precision."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.dtype = 'fp16'
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_8_gpu_eager(self):
"""Test Keras model with 8 GPUs, eager, fp32."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.use_tf_function = False
FLAGS.use_tf_while_loop = False
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def benchmark_8_gpu_eager_fp16(self):
"""Test Keras model with 8 GPUs, eager, fp16."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.dtype = 'fp16'
FLAGS.use_tf_function = False
FLAGS.use_tf_while_loop = False
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager_fp16')
FLAGS.batch_size = 128
self._run_and_report_benchmark()
def _set_df_common(self):
FLAGS.steps_per_loop = 500
FLAGS.train_epochs = 2
FLAGS.train_steps = None
FLAGS.skip_eval = True
FLAGS.enable_eager = True
FLAGS.enable_tensorboard = False
FLAGS.distribution_strategy = 'tpu'
FLAGS.report_accuracy_metrics = False
FLAGS.log_steps = 50
FLAGS.single_l2_loss_op = True
FLAGS.use_tf_function = True
FLAGS.enable_checkpoint_and_export = False
FLAGS.data_dir = IMAGENET_DEFAULT_DATA_PATH
def benchmark_2x2_tpu_bf16(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 1024
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16')
self._run_and_report_benchmark()
@owner_utils.Owner('tf-graph-compiler')
def benchmark_2x2_tpu_bf16_mlir(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 1024
FLAGS.dtype = 'bf16'
tf.config.experimental.enable_mlir_bridge()
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16_mlir')
self._run_and_report_benchmark()
def benchmark_4x4_tpu_bf16(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.train_epochs = 4
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16')
FLAGS.data_dir = IMAGENET_DEFAULT_DATA_PATH
FLAGS.training_dataset_cache = True
self._run_and_report_benchmark()
@owner_utils.Owner('tf-graph-compiler')
def benchmark_4x4_tpu_bf16_mlir(self):
"""Run resnet model on 4x4 with the MLIR Bridge enabled."""
self._setup()
self._set_df_common()
FLAGS.batch_size = 4096
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16_mlir')
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark()
def benchmark_8x8_tpu_bf16(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16')
self._run_and_report_benchmark()
@owner_utils.Owner('tf-graph-compiler')
def benchmark_8x8_tpu_bf16_mlir(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16_mlir')
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark()
def benchmark_8x8_tpu(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu')
self._run_and_report_benchmark()
@owner_utils.Owner('tf-graph-compiler')
def benchmark_8x8_tpu_mlir(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_mlir')
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark()
def benchmark_8x16_tpu_bf16(self):
self._setup()
self._set_df_common()
FLAGS.batch_size = 8192
FLAGS.dtype = 'bf16'
FLAGS.model_dir = self._get_model_dir('benchmark_8x16_tpu_bf16')
self._run_and_report_benchmark()
def fill_report_object(self, stats):
super(Resnet50CtlBenchmarkBase, self).fill_report_object(
stats, total_batch_size=FLAGS.batch_size, log_steps=FLAGS.log_steps)
class Resnet50CtlBenchmarkSynth(Resnet50CtlBenchmarkBase):
"""Resnet50 synthetic benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['skip_eval'] = True
def_flags['use_synthetic_data'] = True
def_flags['train_steps'] = 110
def_flags['steps_per_loop'] = 10
def_flags['log_steps'] = 10
super(Resnet50CtlBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags, **kwargs)
class Resnet50CtlBenchmarkReal(Resnet50CtlBenchmarkBase):
"""Resnet50 real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['skip_eval'] = True
def_flags[
'data_dir'] = os.path.join(root_data_dir, 'imagenet')
def_flags['train_steps'] = 110
def_flags['steps_per_loop'] = 10
def_flags['log_steps'] = 10
super(Resnet50CtlBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags, **kwargs)
if __name__ == '__main__':
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes RetinaNet benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# pylint: disable=g-bad-import-order
import json
import time
from absl import flags
from absl.testing import flagsaver
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import benchmark_wrappers
from official.benchmark import perfzero_benchmark
from official.legacy.detection import main as detection
from official.legacy.detection.configs import base_config
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
FLAGS = flags.FLAGS
# pylint: disable=line-too-long
COCO_TRAIN_DATA = 'gs://tf-perfzero-data/coco/train*'
COCO_EVAL_DATA = 'gs://tf-perfzero-data/coco/val*'
COCO_EVAL_JSON = 'gs://tf-perfzero-data/coco/instances_val2017.json'
RESNET_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07'
# pylint: enable=line-too-long
class BenchmarkBase(perfzero_benchmark.PerfZeroBenchmark):
"""Base class to hold methods common to test classes."""
def __init__(self, **kwargs):
super(BenchmarkBase, self).__init__(**kwargs)
self.timer_callback = None
def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap,
max_ap, warmup):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from Detection models with known entries.
start_time_sec: the start of the benchmark execution in seconds
wall_time_sec: the duration of the benchmark execution in seconds
min_ap: Minimum detection AP constraint to verify correctness of the
model.
max_ap: Maximum detection AP accuracy constraint to verify correctness of
the model.
warmup: Number of time log entries to ignore when computing examples/sec.
"""
metrics = [{
'name': 'total_loss',
'value': stats['total_loss'],
}]
if self.timer_callback:
metrics.append({
'name': 'exp_per_second',
'value': self.timer_callback.get_examples_per_sec(warmup)
})
metrics.append({
'name': 'startup_time',
'value': self.timer_callback.get_startup_time(start_time_sec)
})
else:
metrics.append({
'name': 'exp_per_second',
'value': 0.0,
})
if 'eval_metrics' in stats:
metrics.append({
'name': 'AP',
'value': stats['AP'],
'min_value': min_ap,
'max_value': max_ap,
})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=stats['total_steps'],
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class DetectionBenchmarkBase(BenchmarkBase):
"""Base class to hold methods common to test classes in the module."""
def __init__(self, **kwargs):
self.train_data_path = COCO_TRAIN_DATA
self.eval_data_path = COCO_EVAL_DATA
self.eval_json_path = COCO_EVAL_JSON
self.resnet_checkpoint_path = RESNET_CHECKPOINT_PATH
super(DetectionBenchmarkBase, self).__init__(**kwargs)
def _run_detection_main(self):
"""Starts detection job."""
if self.timer_callback:
FLAGS.log_steps = 0 # prevent detection.run from adding the same callback
return detection.run(callbacks=[self.timer_callback])
else:
return detection.run()
class DetectionAccuracy(DetectionBenchmarkBase):
"""Accuracy test for RetinaNet model.
Tests RetinaNet detection task model accuracy. The naming
convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, model, per_gpu_batch_size=8, **kwargs):
self.model = model
self.per_gpu_batch_size = per_gpu_batch_size
super(DetectionAccuracy, self).__init__(**kwargs)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
params,
min_ap=0.325,
max_ap=0.35,
do_eval=True,
warmup=1):
"""Starts Detection accuracy benchmark test."""
FLAGS.params_override = json.dumps(params)
# Need timer callback to measure performance
self.timer_callback = keras_utils.TimeHistory(
batch_size=params['train']['batch_size'],
log_steps=FLAGS.log_steps,
)
start_time_sec = time.time()
FLAGS.mode = 'train'
summary, _ = self._run_detection_main()
wall_time_sec = time.time() - start_time_sec
if do_eval:
FLAGS.mode = 'eval'
eval_metrics = self._run_detection_main()
summary.update(eval_metrics)
summary['total_steps'] = params['train']['total_steps']
self._report_benchmark(summary, start_time_sec, wall_time_sec, min_ap,
max_ap, warmup)
def _setup(self):
super(DetectionAccuracy, self)._setup()
FLAGS.model = self.model
def _params(self):
return {
'architecture': {
'use_bfloat16': True,
},
'train': {
'batch_size': 64,
'iterations_per_loop': 100,
'total_steps': 22500,
'train_file_pattern': self.train_data_path,
'checkpoint': {
'path': self.resnet_checkpoint_path,
'prefix': 'resnet50/'
},
# Speed up ResNet training when loading from the checkpoint.
'frozen_variable_prefix': base_config.RESNET_FROZEN_VAR_PREFIX,
},
'eval': {
'batch_size': 8,
'eval_samples': 5000,
'val_json_file': self.eval_json_path,
'eval_file_pattern': self.eval_data_path,
},
}
@flagsaver.flagsaver
def benchmark_8_gpu_coco(self):
"""Run RetinaNet model accuracy test with 8 GPUs."""
self._setup()
params = self._params()
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_coco')
FLAGS.strategy_type = 'mirrored'
self._run_and_report_benchmark(params)
class DetectionBenchmarkReal(DetectionAccuracy):
"""Short benchmark performance tests for a detection model.
Tests detection performance in different accelerator configurations.
The naming convention of below test cases follow
`benchmark_(number of gpus)_gpu` format.
"""
def _setup(self):
super(DetectionBenchmarkReal, self)._setup()
# Use negative value to avoid saving checkpoints.
FLAGS.save_checkpoint_freq = -1
@flagsaver.flagsaver
def benchmark_8_gpu_coco(self):
"""Run detection model accuracy test with 8 GPUs."""
self._setup()
params = self._params()
params['architecture']['use_bfloat16'] = False
params['train']['total_steps'] = 1875 # One epoch.
params['train']['batch_size'] = 8 * self.per_gpu_batch_size
# The iterations_per_loop must be one, otherwise the number of examples per
# second would be wrong. Currently only support calling callback per batch
# when each loop only runs on one batch, i.e. host loop for one step. The
# performance of this situation might be lower than the case of
# iterations_per_loop > 1.
# Related bug: b/135933080
params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('real_benchmark_8_gpu_coco')
FLAGS.strategy_type = 'mirrored'
self._run_and_report_benchmark(params)
@flagsaver.flagsaver
def benchmark_1_gpu_coco(self):
"""Run detection model accuracy test with 1 GPU."""
self._setup()
params = self._params()
params['architecture']['use_bfloat16'] = False
params['train']['batch_size'] = 1 * self.per_gpu_batch_size
params['train']['total_steps'] = 200
params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco')
FLAGS.strategy_type = 'one_device'
self._run_and_report_benchmark(params)
@flagsaver.flagsaver
def benchmark_xla_1_gpu_coco(self):
"""Run detection model accuracy test with 1 GPU and XLA enabled."""
self._setup()
params = self._params()
params['architecture']['use_bfloat16'] = False
params['train']['batch_size'] = 1 * self.per_gpu_batch_size
params['train']['total_steps'] = 200
params['train']['iterations_per_loop'] = 1
params['eval']['eval_samples'] = 8
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('real_benchmark_xla_1_gpu_coco')
FLAGS.strategy_type = 'one_device'
FLAGS.enable_xla = True
self._run_and_report_benchmark(params)
@flagsaver.flagsaver
def benchmark_2x2_tpu_coco(self):
"""Run detection model accuracy test with 4 TPUs."""
self._setup()
params = self._params()
params['train']['batch_size'] = 64
params['train']['total_steps'] = 1875 # One epoch.
params['train']['iterations_per_loop'] = 500
FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco')
FLAGS.strategy_type = 'tpu'
self._run_and_report_benchmark(params, do_eval=False, warmup=0)
@flagsaver.flagsaver
def benchmark_4x4_tpu_coco(self):
"""Run detection model accuracy test with 4x4 TPU."""
self._setup()
params = self._params()
params['train']['batch_size'] = 256
params['train']['total_steps'] = 10 * 469 # 10 epochs.
params['train']['iterations_per_loop'] = 500
FLAGS.model_dir = self._get_model_dir('real_benchmark_4x4_tpu_coco')
FLAGS.strategy_type = 'tpu'
self._run_and_report_benchmark(params, do_eval=False, warmup=1)
@flagsaver.flagsaver
def benchmark_2x2_tpu_coco_mlir(self):
"""Run detection model accuracy test with 4 TPUs."""
self._setup()
params = self._params()
params['train']['batch_size'] = 64
params['train']['total_steps'] = 1875 # One epoch.
params['train']['iterations_per_loop'] = 500
FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco_mlir')
FLAGS.strategy_type = 'tpu'
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark(params, do_eval=False, warmup=0)
@flagsaver.flagsaver
def benchmark_4x4_tpu_coco_mlir(self):
"""Run RetinaNet model accuracy test with 4 TPUs."""
self._setup()
params = self._params()
params['train']['batch_size'] = 256
params['train']['total_steps'] = 469 # One epoch.
params['train']['iterations_per_loop'] = 500
FLAGS.model_dir = self._get_model_dir('real_benchmark_4x4_tpu_coco_mlir')
FLAGS.strategy_type = 'tpu'
tf.config.experimental.enable_mlir_bridge()
self._run_and_report_benchmark(params, do_eval=False, warmup=0)
@flagsaver.flagsaver
def benchmark_2x2_tpu_spinenet_coco(self):
"""Run detection model with SpineNet backbone accuracy test with 4 TPUs."""
self._setup()
params = self._params()
params['architecture']['backbone'] = 'spinenet'
params['architecture']['multilevel_features'] = 'identity'
params['architecture']['use_bfloat16'] = False
params['train']['batch_size'] = 64
params['train']['total_steps'] = 1875 # One epoch.
params['train']['iterations_per_loop'] = 500
params['train']['checkpoint']['path'] = ''
FLAGS.model_dir = self._get_model_dir(
'real_benchmark_2x2_tpu_spinenet_coco')
FLAGS.strategy_type = 'tpu'
self._run_and_report_benchmark(params, do_eval=False, warmup=0)
class RetinanetBenchmarkReal(DetectionBenchmarkReal):
"""Short benchmark performance tests for Retinanet model."""
def __init__(self, **kwargs):
super(RetinanetBenchmarkReal, self).__init__(model='retinanet',
per_gpu_batch_size=8,
**kwargs)
class MaskRCNNBenchmarkReal(DetectionBenchmarkReal):
"""Short benchmark performance tests for Mask RCNN model."""
def __init__(self, **kwargs):
super(MaskRCNNBenchmarkReal, self).__init__(model='mask_rcnn',
per_gpu_batch_size=4,
**kwargs)
class ShapeMaskBenchmarkReal(DetectionBenchmarkReal):
"""Short benchmark performance tests for ShapeMask model."""
def __init__(self, **kwargs):
super(ShapeMaskBenchmarkReal, self).__init__(model='shapemask',
per_gpu_batch_size=4,
**kwargs)
if __name__ == '__main__':
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Shakespeare (LSTM) benchmark and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark.models.shakespeare import shakespeare_main
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.benchmark import benchmark_wrappers
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
SHAKESPEARE_TRAIN_DATA = 'shakespeare/shakespeare.txt'
TMP_DIR = os.getenv('TMPDIR')
FLAGS = flags.FLAGS
class ShakespeareBenchmarkBase(PerfZeroBenchmark):
"""Base class for Shakespeare (LSTM) benchmark and accuracy tests."""
def __init__(self, output_dir=None, default_flags=None, root_data_dir=None):
super(ShakespeareBenchmarkBase, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=[shakespeare_main.define_flags])
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
top_1_train_min=0.91,
top_1_train_max=0.94,
warmup=1,
log_steps=100):
"""Report benchmark results by writing to local protobuf file.
Average epoch time is calculated by skipping the first epoch. This average
ignores time spent between epoch and is recorded by begin and end epoch. To
skip accuracy check set `top_1_train_min=None`.
Args:
top_1_train_min: lowest passing value.
top_1_train_max: highest passing value.
warmup: number of entries in `timestamp_log` to ignore.
log_steps: How often the log was created for `timestamp_log`.
"""
total_batch_size = FLAGS.batch_size
metrics = []
start_time_sec = time.time()
stats = shakespeare_main.run(FLAGS)
wall_time_sec = time.time() - start_time_sec
if top_1_train_min:
metrics.append({
'name': 'accuracy_top_1_train',
'value': stats['history']['RecallAt1'][-1],
'min_value': top_1_train_min,
'max_value': top_1_train_max
})
# Look for the time history callback which was used during keras.fit
for callback in stats['callbacks']:
if isinstance(callback, keras_utils.TimeHistory):
epoch_timings = callback.epoch_runtime_log
if len(epoch_timings) > 1:
average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:])
metrics.append({'name': 'avg_epoch_time', 'value': average_time})
# First entry in timestamp_log is the start of step 1. The rest of the
# entries are the end of each step recorded.
time_log = callback.timestamp_log
elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
num_examples = (
total_batch_size * log_steps * (len(time_log) - warmup - 1))
if elapsed > 0:
examples_per_sec = num_examples / elapsed
metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=-1,
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class ShakespeareAccuracy(ShakespeareBenchmarkBase):
"""Shakespeare accuracy tests.
This is not an ideal test. The best we can use for the accuracy check is to
validate top_1 of the training set. At batch size 64 the top_1 training
stabilizes to ~0.92 around 40-45 epochs.
"""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Shakespeare accuracy tests.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more named
arguments before updating the constructor.
"""
self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
super(ShakespeareAccuracy, self).__init__(
output_dir=output_dir, root_data_dir=root_data_dir)
def benchmark_cpu(self):
"""Benchmark cpu."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
def benchmark_cpu_no_ds_run_eagerly(self):
"""Benchmark cpu without distribution strategies and run eagerly."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds(self):
"""Benchmark 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu without distribution strategies and run eagerly."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Benchmark 8 gpu.
This is test is for accuracy not scaling. The batch-size is not scaled to
the number of gpus.
"""
self._setup()
FLAGS.num_gpus = 8
FLAGS.training_data = self.train_data
FLAGS.batch_size = 64
FLAGS.train_epochs = 43
FLAGS.model_dir = ''
self._run_and_report_benchmark()
class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
"""Benchmark accuracy tests."""
def __init__(self, output_dir=None, root_data_dir=TMP_DIR, **kwargs):
"""Benchmark tests w/Keras.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more named
arguments before updating the constructor.
"""
self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
def_flags = {}
def_flags['training_data'] = self.train_data
def_flags['model_dir'] = ''
def_flags['train_epochs'] = 4
def_flags['log_steps'] = 50
super(ShakespeareKerasBenchmarkReal, self).__init__(
output_dir=output_dir,
root_data_dir=root_data_dir,
default_flags=def_flags)
def benchmark_cpu(self):
"""Benchmark cpu."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_cpu_no_ds_run_eagerly(self):
"""Benchmark cpu without distribution strategy and run eagerly."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
FLAGS.run_eagerly = True
self._run_and_report_benchmark()
def benchmark_cpu_no_ds(self):
"""Benchmark cpu without distribution strategy."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_cpu_no_ds_force_v2(self):
"""Benchmark cpu no ds, and force v2."""
self._setup()
FLAGS.num_gpus = 0
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
self._run_and_report_benchmark()
def benchmark_1_gpu_no_cudnn(self):
"""Benchmark 1 gpu with CuDNN disabled."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.cudnn = False
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds(self):
"""Benchmark 1 gpu without distribution strategies."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_1_gpu_no_ds_run_eagerly(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.run_eagerly = True
FLAGS.distribution_strategy = 'off'
self._run_and_report_benchmark()
def benchmark_xla_1_gpu(self):
"""Benchmark 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_1_gpu_no_cudnn(self):
"""Benchmark 1 gpu w/xla and CuDNN disabled."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64
FLAGS.cudnn = False
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Benchmark 8 gpu."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
self._run_and_report_benchmark()
def benchmark_8_gpu_no_cudnn(self):
"""Benchmark 8 gpu with CuDNN disabled."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
FLAGS.cudnn = False
self._run_and_report_benchmark()
def benchmark_xla_8_gpu(self):
"""Benchmark 8 gpu w/xla."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_no_cudnn(self):
"""Benchmark 8 gpu w/xla and CuDNN disabled."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.batch_size = 64 * 8
FLAGS.log_steps = 10
FLAGS.cudnn = False
FLAGS.enable_xla = True
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
"""Run and report benchmark."""
super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark(
top_1_train_min=None, log_steps=FLAGS.log_steps)
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to setup a tf scan e2e benchmark."""
import time
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from official.benchmark import perfzero_benchmark
# pylint: disable=invalid-name
# pylint: disable=no-value-for-parameter
# pylint: disable=unused-variable
def gen_batches(num_batches, batch_size, units):
for _ in range(num_batches):
x = np.random.random((batch_size, 20, units))
y = np.random.randint(1, units, size=(batch_size, 20))
yield x, y
class MyModel(tf.keras.models.Model):
"""Test model."""
def __init__(self, units):
super().__init__()
self._tf_layers = {}
self.units = units
self.transition_param = self.add_weight(
name="transition_param", shape=(units, units))
self.optimizer = tf.keras.optimizers.Adam()
self._training = False
def _loss_fn_with_scan(self, inputs, transition_params):
first_input = tf.slice(inputs, [0, 0, 0], [-1, 1, -1])
first_input = tf.squeeze(first_input, [1])
rest_of_input = tf.slice(inputs, [0, 1, 0], [-1, -1, -1])
rest_of_input = tf.transpose(rest_of_input, [1, 0, 2])
transition_params = tf.expand_dims(transition_params, 0)
def _scan_fn(_state, _inputs):
_state = tf.expand_dims(_state, 2)
transition_scores = _state + transition_params
new_alphas = _inputs + tf.reduce_logsumexp(transition_scores, [1])
return new_alphas
all_alphas = tf.transpose(
tf.scan(_scan_fn, rest_of_input, first_input), [1, 0, 2])
# add first state for sequences of length 1
all_alphas = tf.concat([tf.expand_dims(first_input, 1), all_alphas], 1)
return all_alphas
def _loss(self, x, y):
logits = tf.cast(x, dtype=tf.float32)
loss = self._loss_fn_with_scan(logits, self.transition_param)
return tf.reduce_mean(loss)
@tf.function
def train_on_batch(self, *args):
with tf.GradientTape(persistent=True) as tape:
loss = self._loss(*args)
grads = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
return loss
def train(self, epochs, batch_size, num_batches):
data_generator_iter = gen_batches(num_batches, batch_size, self.units)
sample_x, sample_y = next(data_generator_iter)
self.train_on_batch(sample_x, sample_y)
self._training = True
progress_bar = tqdm(range(epochs), desc="Epochs")
for epoch in progress_bar:
for batch_x, batch_y in data_generator_iter:
loss = self.train_on_batch(batch_x, batch_y)
progress_bar.update(1)
progress_bar.set_postfix({"loss": f"{loss.numpy():.3f}"})
def _run_benchmark(model):
"""Runs the benchmark."""
np.random.seed(123)
num_batches = 5000
batch_size = 32
epochs = 100
start_time = time.time()
model.train(epochs, batch_size, num_batches)
end_time = time.time()
wall_time = end_time - start_time
return wall_time
class TfScanE2EBenchmark(perfzero_benchmark.PerfZeroBenchmark):
"""Scan E2E benchmark."""
def benchmark_cpu(self):
units = 64
model = MyModel(units)
wall_time = _run_benchmark(model)
self.report_benchmark(iters=-1, wall_time=wall_time)
def benchmark_cpu_avg_4(self):
units = 64
model = MyModel(units)
num_trials = 4
wall_times = []
for _ in range(num_trials):
wall_times.append(_run_benchmark(model))
avg_wall_time = sum(wall_times) / float(len(wall_times))
self.report_benchmark(iters=-1, wall_time=avg_wall_time)
if __name__ == "__main__":
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark TF-vision saved models on a TFRecord dataset."""
import time
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
FLAGS = flags.FLAGS
flags.DEFINE_string('saved_model_path', None, 'Path to saved model.')
flags.DEFINE_string('tf_examples_path', None, 'Path to TF examples.')
flags.DEFINE_integer('num_samples', 100, 'Number of samples.')
flags.DEFINE_integer('num_ignore_samples', 5,
('Number of initial samples to ignore. '
'The first few samples (usually 1) are used by '
'tensorflow to optimize the tf.function call'))
flags.mark_flag_as_required('saved_model_path')
flags.mark_flag_as_required('tf_examples_path')
flags.mark_flag_as_required('num_samples')
def main(_) -> None:
files = tf.data.Dataset.list_files(FLAGS.tf_examples_path)
logging.info('Found %d files.', len(files))
dataset = tf.data.TFRecordDataset(files)
model = tf.saved_model.load(FLAGS.saved_model_path)
detect_fn = model.signatures['serving_default']
time_taken = 0.0
for (i, sample) in enumerate(dataset.take(FLAGS.num_samples)):
example = tf.train.Example()
example.ParseFromString(sample.numpy())
image_encoded = example.features.feature['image/encoded']
image = tf.io.decode_image(image_encoded.bytes_list.value[0])
image = image[tf.newaxis]
start_time = time.time()
_ = detect_fn(image)
sample_time = time.time() - start_time
if (i % 10) == 0:
logging.info('Finished sample %d %.2f ms', i, sample_time * 1000.0)
if i < FLAGS.num_ignore_samples:
continue
time_taken += sample_time
num_benchmark_samples = FLAGS.num_samples - FLAGS.num_ignore_samples
logging.info('Per-sample time for {} samples = {:.2f}ms'.format(
num_benchmark_samples, 1000.0 * time_taken / num_benchmark_samples))
if __name__ == '__main__':
app.run(main)
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a memory usage benchmark for a Tensorflow Hub model.
Loads a SavedModel and records memory usage.
"""
import functools
import time
from absl import flags
import tensorflow as tf
import tensorflow_hub as hub
from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
FLAGS = flags.FLAGS
class TfHubMemoryUsageBenchmark(PerfZeroBenchmark):
"""A benchmark measuring memory usage for a given TF Hub SavedModel."""
def __init__(self,
hub_model_handle_list=None,
output_dir=None,
default_flags=None,
root_data_dir=None,
**kwargs):
super(TfHubMemoryUsageBenchmark, self).__init__(
output_dir=output_dir, default_flags=default_flags, **kwargs)
if hub_model_handle_list:
for hub_model_handle in hub_model_handle_list.split(';'):
# Converts a model handle of the form
# https://tfhub.dev/google/nnlm-en-dim128/1 to valid python method name
# like google_nnlm_en_dim128_1.
hub_model_method_name = hub_model_handle.replace(
'https://tfhub.dev', '').replace('/', '_').replace('-',
'_').strip('_')
setattr(
self, 'benchmark_' + hub_model_method_name,
functools.partial(self.benchmark_memory_usage, hub_model_handle))
def benchmark_memory_usage(
self, hub_model_handle='https://tfhub.dev/google/nnlm-en-dim128/1'):
start_time_sec = time.time()
self.load_model(hub_model_handle)
wall_time_sec = time.time() - start_time_sec
metrics = []
self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
def load_model(self, hub_model_handle):
"""Loads a TF Hub module."""
hub.load(hub_model_handle)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment