Commit 47bc1813 authored by syiming's avatar syiming
Browse files

Merge remote-tracking branch 'upstream/master' into add_multilevel_crop_and_resize

parents d8611151 b035a227
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import tensorflow.compat.v1 as tf # pylint: disable=g-bad-import-order
from absl import logging
from official.r1.mnist import mnist
BATCH_SIZE = 100
def dummy_input_fn():
image = tf.random.uniform([BATCH_SIZE, 784])
labels = tf.random.uniform([BATCH_SIZE, 1], maxval=9, dtype=tf.int32)
return image, labels
def make_estimator():
data_format = 'channels_last'
if tf.test.is_built_with_cuda():
data_format = 'channels_first'
return tf.estimator.Estimator(
model_fn=mnist.model_fn, params={
'data_format': data_format
})
class Tests(tf.test.TestCase):
"""Run tests for MNIST model.
MNIST uses contrib and will not work with TF 2.0. All tests are disabled if
using TF 2.0.
"""
def test_mnist(self):
classifier = make_estimator()
classifier.train(input_fn=dummy_input_fn, steps=2)
eval_results = classifier.evaluate(input_fn=dummy_input_fn, steps=1)
loss = eval_results['loss']
global_step = eval_results['global_step']
accuracy = eval_results['accuracy']
self.assertEqual(loss.shape, ())
self.assertEqual(2, global_step)
self.assertEqual(accuracy.shape, ())
input_fn = lambda: tf.random.uniform([3, 784])
predictions_generator = classifier.predict(input_fn)
for _ in range(3):
predictions = next(predictions_generator)
self.assertEqual(predictions['probabilities'].shape, (10,))
self.assertEqual(predictions['classes'].shape, ())
def mnist_model_fn_helper(self, mode, multi_gpu=False):
features, labels = dummy_input_fn()
image_count = features.shape[0]
spec = mnist.model_fn(features, labels, mode, {
'data_format': 'channels_last',
'multi_gpu': multi_gpu
})
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape, (image_count, 10))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (image_count,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_mnist_model_fn_train_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN)
def test_mnist_model_fn_train_mode_multi_gpu(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN, multi_gpu=True)
def test_mnist_model_fn_eval_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.EVAL)
def test_mnist_model_fn_predict_mode(self):
self.mnist_model_fn_helper(tf.estimator.ModeKeys.PREDICT)
class Benchmarks(tf.test.Benchmark):
"""Simple speed benchmarking for MNIST."""
def benchmark_train_step_time(self):
classifier = make_estimator()
# Run one step to warmup any use of the GPU.
classifier.train(input_fn=dummy_input_fn, steps=1)
have_gpu = tf.test.is_gpu_available()
num_steps = 1000 if have_gpu else 100
name = 'train_step_time_%s' % ('gpu' if have_gpu else 'cpu')
start = time.time()
classifier.train(input_fn=dummy_input_fn, steps=num_steps)
end = time.time()
wall_time = (end - start) / num_steps
self.report_benchmark(
iters=num_steps,
wall_time=wall_time,
name=name,
extras={
'examples_per_sec': BATCH_SIZE / wall_time
})
if __name__ == '__main__':
logging.set_verbosity(logging.ERROR)
tf.disable_v2_behavior()
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""MNIST model training using TPUs.
This program demonstrates training of the convolutional neural network model
defined in mnist.py on Google Cloud TPUs (https://cloud.google.com/tpu/).
If you are not interested in TPUs, you should ignore this file.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
# pylint: disable=g-bad-import-order
from absl import app as absl_app # pylint: disable=unused-import
import tensorflow.compat.v1 as tf
# pylint: enable=g-bad-import-order
# For open source environment, add grandparent directory for import
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(sys.path[0]))))
from official.r1.mnist import dataset # pylint: disable=wrong-import-position
from official.r1.mnist import mnist # pylint: disable=wrong-import-position
# Cloud TPU Cluster Resolver flags
tf.flags.DEFINE_string(
"tpu", default=None,
help="The Cloud TPU to use for training. This should be either the name "
"used when creating the Cloud TPU, or a grpc://ip.address.of.tpu:8470 "
"url.")
tf.flags.DEFINE_string(
"tpu_zone", default=None,
help="[Optional] GCE zone where the Cloud TPU is located in. If not "
"specified, we will attempt to automatically detect the GCE project from "
"metadata.")
tf.flags.DEFINE_string(
"gcp_project", default=None,
help="[Optional] Project name for the Cloud TPU-enabled project. If not "
"specified, we will attempt to automatically detect the GCE project from "
"metadata.")
# Model specific parameters
tf.flags.DEFINE_string("data_dir", "",
"Path to directory containing the MNIST dataset")
tf.flags.DEFINE_string("model_dir", None, "Estimator model_dir")
tf.flags.DEFINE_integer("batch_size", 1024,
"Mini-batch size for the training. Note that this "
"is the global batch size and not the per-shard batch.")
tf.flags.DEFINE_integer("train_steps", 1000, "Total number of training steps.")
tf.flags.DEFINE_integer("eval_steps", 0,
"Total number of evaluation steps. If `0`, evaluation "
"after training is skipped.")
tf.flags.DEFINE_float("learning_rate", 0.05, "Learning rate.")
tf.flags.DEFINE_bool("use_tpu", True, "Use TPUs rather than plain CPUs")
tf.flags.DEFINE_bool("enable_predict", True, "Do some predictions at the end")
tf.flags.DEFINE_integer("iterations", 50,
"Number of iterations per TPU training loop.")
tf.flags.DEFINE_integer("num_shards", 8, "Number of shards (TPU chips).")
FLAGS = tf.flags.FLAGS
def metric_fn(labels, logits):
accuracy = tf.metrics.accuracy(
labels=labels, predictions=tf.argmax(logits, axis=1))
return {"accuracy": accuracy}
def model_fn(features, labels, mode, params):
"""model_fn constructs the ML model used to predict handwritten digits."""
del params
image = features
if isinstance(image, dict):
image = features["image"]
model = mnist.create_model("channels_last")
if mode == tf.estimator.ModeKeys.PREDICT:
logits = model(image, training=False)
predictions = {
'class_ids': tf.argmax(logits, axis=1),
'probabilities': tf.nn.softmax(logits),
}
return tf.estimator.tpu.TPUEstimatorSpec(mode, predictions=predictions)
logits = model(image, training=(mode == tf.estimator.ModeKeys.TRAIN))
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
learning_rate = tf.train.exponential_decay(
FLAGS.learning_rate,
tf.train.get_global_step(),
decay_steps=100000,
decay_rate=0.96)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
if FLAGS.use_tpu:
optimizer = tf.tpu.CrossShardOptimizer(optimizer)
return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode,
loss=loss,
train_op=optimizer.minimize(loss, tf.train.get_global_step()))
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode, loss=loss, eval_metrics=(metric_fn, [labels, logits]))
def train_input_fn(params):
"""train_input_fn defines the input pipeline used for training."""
batch_size = params["batch_size"]
data_dir = params["data_dir"]
# Retrieves the batch size for the current shard. The # of shards is
# computed according to the input pipeline deployment. See
# `tf.estimator.tpu.RunConfig` for details.
ds = dataset.train(data_dir).cache().repeat().shuffle(
buffer_size=50000).batch(batch_size, drop_remainder=True)
return ds
def eval_input_fn(params):
batch_size = params["batch_size"]
data_dir = params["data_dir"]
ds = dataset.test(data_dir).batch(batch_size, drop_remainder=True)
return ds
def predict_input_fn(params):
batch_size = params["batch_size"]
data_dir = params["data_dir"]
# Take out top 10 samples from test data to make the predictions.
ds = dataset.test(data_dir).take(10).batch(batch_size)
return ds
def main(argv):
del argv # Unused.
tf.logging.set_verbosity(tf.logging.INFO)
tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu,
zone=FLAGS.tpu_zone,
project=FLAGS.gcp_project
)
run_config = tf.estimator.tpu.RunConfig(
cluster=tpu_cluster_resolver,
model_dir=FLAGS.model_dir,
session_config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=True),
tpu_config=tf.estimator.tpu.TPUConfig(FLAGS.iterations, FLAGS.num_shards),
)
estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn,
use_tpu=FLAGS.use_tpu,
train_batch_size=FLAGS.batch_size,
eval_batch_size=FLAGS.batch_size,
predict_batch_size=FLAGS.batch_size,
params={"data_dir": FLAGS.data_dir},
config=run_config)
# TPUEstimator.train *requires* a max_steps argument.
estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
# TPUEstimator.evaluate *requires* a steps argument.
# Note that the number of examples used during evaluation is
# --eval_steps * --batch_size.
# So if you change --batch_size then change --eval_steps too.
if FLAGS.eval_steps:
estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.eval_steps)
# Run prediction on top few samples of test data.
if FLAGS.enable_predict:
predictions = estimator.predict(input_fn=predict_input_fn)
for pred_dict in predictions:
template = ('Prediction is "{}" ({:.1f}%).')
class_id = pred_dict['class_ids']
probability = pred_dict['probabilities'][class_id]
print(template.format(class_id, 100 * probability))
if __name__ == "__main__":
tf.disable_v2_behavior()
absl_app.run(main)
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# NCF Estimator implementation
NCF framework to train and evaluate the NeuMF model
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""NCF framework to train and evaluate the NeuMF model.
The NeuMF model assembles both MF and MLP models under the NCF framework. Check
`neumf_model.py` for more details about the models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import heapq
import json
import math
import multiprocessing
import os
import signal
from absl import app as absl_app
from absl import flags
from absl import logging
import numpy as np
from six.moves import range
import tensorflow as tf
import typing
from official.r1.utils.logs import hooks_helper
from official.r1.utils.logs import logger
from official.r1.utils.logs import mlperf_helper
from official.recommendation import constants as rconst
from official.recommendation import data_pipeline
from official.recommendation import data_preprocessing
from official.recommendation import movielens
from official.recommendation import ncf_common
from official.recommendation import neumf_model
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def construct_estimator(model_dir, params):
"""Construct either an Estimator for NCF.
Args:
model_dir: The model directory for the estimator
params: The params dict for the estimator
Returns:
An Estimator.
"""
distribution = ncf_common.get_v1_distribution_strategy(params)
run_config = tf.estimator.RunConfig(train_distribute=distribution,
eval_distribute=distribution)
model_fn = neumf_model.neumf_model_fn
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir,
config=run_config, params=params)
return estimator
def log_and_get_hooks(eval_batch_size):
"""Convenience function for hook and logger creation."""
# Create hooks that log information about the training and metric values
train_hooks = hooks_helper.get_train_hooks(
FLAGS.hooks,
model_dir=FLAGS.model_dir,
batch_size=FLAGS.batch_size, # for ExamplesPerSecondHook
tensors_to_log={"cross_entropy": "cross_entropy"}
)
run_params = {
"batch_size": FLAGS.batch_size,
"eval_batch_size": eval_batch_size,
"number_factors": FLAGS.num_factors,
"hr_threshold": FLAGS.hr_threshold,
"train_epochs": FLAGS.train_epochs,
}
benchmark_logger = logger.get_benchmark_logger()
benchmark_logger.log_run_info(
model_name="recommendation",
dataset_name=FLAGS.dataset,
run_params=run_params,
test_id=FLAGS.benchmark_test_id)
return benchmark_logger, train_hooks
def main(_):
with logger.benchmark_context(FLAGS), \
mlperf_helper.LOGGER(FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
run_ncf(FLAGS)
def run_ncf(_):
"""Run NCF training and eval loop."""
params = ncf_common.parse_flags(FLAGS)
num_users, num_items, num_train_steps, num_eval_steps, producer = (
ncf_common.get_inputs(params))
params["num_users"], params["num_items"] = num_users, num_items
producer.start()
model_helpers.apply_clean(flags.FLAGS)
estimator = construct_estimator(model_dir=FLAGS.model_dir, params=params)
benchmark_logger, train_hooks = log_and_get_hooks(params["eval_batch_size"])
total_training_cycle = FLAGS.train_epochs // FLAGS.epochs_between_evals
target_reached = False
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.TRAIN_LOOP)
for cycle_index in range(total_training_cycle):
assert FLAGS.epochs_between_evals == 1 or not mlperf_helper.LOGGER.enabled
logging.info("Starting a training cycle: {}/{}".format(
cycle_index + 1, total_training_cycle))
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.TRAIN_EPOCH,
value=cycle_index)
train_input_fn = producer.make_input_fn(is_training=True)
estimator.train(input_fn=train_input_fn, hooks=train_hooks,
steps=num_train_steps)
logging.info("Beginning evaluation.")
eval_input_fn = producer.make_input_fn(is_training=False)
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START,
value=cycle_index)
eval_results = estimator.evaluate(eval_input_fn, steps=num_eval_steps)
logging.info("Evaluation complete.")
hr = float(eval_results[rconst.HR_KEY])
ndcg = float(eval_results[rconst.NDCG_KEY])
loss = float(eval_results["loss"])
mlperf_helper.ncf_print(
key=mlperf_helper.TAGS.EVAL_TARGET,
value={"epoch": cycle_index, "value": FLAGS.hr_threshold})
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_ACCURACY,
value={"epoch": cycle_index, "value": hr})
mlperf_helper.ncf_print(
key=mlperf_helper.TAGS.EVAL_HP_NUM_NEG,
value={"epoch": cycle_index, "value": rconst.NUM_EVAL_NEGATIVES})
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_STOP, value=cycle_index)
# Benchmark the evaluation results
benchmark_logger.log_evaluation_result(eval_results)
# Log the HR and NDCG results.
logging.info(
"Iteration {}: HR = {:.4f}, NDCG = {:.4f}, Loss = {:.4f}".format(
cycle_index + 1, hr, ndcg, loss))
# If some evaluation threshold is met
if model_helpers.past_stop_threshold(FLAGS.hr_threshold, hr):
target_reached = True
break
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.RUN_STOP,
value={"success": target_reached})
producer.stop_loop()
producer.join()
# Clear the session explicitly to avoid session delete error
tf.keras.backend.clear_session()
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.RUN_FINAL)
if __name__ == "__main__":
logging.set_verbosity(logging.INFO)
ncf_common.define_ncf_flags()
absl_app.run(main)
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# ResNet in TensorFlow
Deep residual networks, or ResNets for short, provided the breakthrough idea of
identity mappings in order to enable training of very deep convolutional neural
networks. This folder contains an implementation of ResNet for the ImageNet
dataset written in TensorFlow.
See the following papers for more background:
[1] [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
[2] [Identity Mappings in Deep Residual Networks](https://arxiv.org/pdf/1603.05027.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
In code, v1 refers to the ResNet defined in [1] but where a stride 2 is used on
the 3x3 conv rather than the first 1x1 in the bottleneck. This change results
in higher and more stable accuracy with less epochs than the original v1 and has
shown to scale to higher batch sizes with minimal degradation in accuracy.
There is no originating paper. The first mention we are aware of was in the
torch version of [ResNetv1](https://github.com/facebook/fb.resnet.torch). Most
popular v1 implementations are this implementation which we call ResNetv1.5.
In testing we found v1.5 requires ~12% more compute to train and has 6% reduced
throughput for inference compared to ResNetv1. CIFAR-10 ResNet does not use the
bottleneck and is thus the same for v1 as v1.5.
v2 refers to [2]. The principle difference between the two versions is that v1
applies batch normalization and activation after convolution, while v2 applies
batch normalization, then activation, and finally convolution. A schematic
comparison is presented in Figure 1 (left) of [2].
Please proceed according to which dataset you would like to train/evaluate on:
## CIFAR-10
### Setup
You need to have the latest version of TensorFlow installed.
First, make sure [the models folder is in your Python path](/official/#running-the-models); otherwise you may encounter `ImportError: No module named official.resnet`.
Then, download and extract the CIFAR-10 data from Alex's website, specifying the location with the `--data_dir` flag. Run the following:
```bash
python cifar10_download_and_extract.py --data_dir <DATA_DIR>
```
Then, to train the model:
```bash
python cifar10_main.py --data_dir <DATA_DIR>/cifar-10-batches-bin --model_dir <MODEL_DIR>
```
Use `--data_dir` to specify the location of the CIFAR-10 data used in the previous step. There are more flag options as described in `cifar10_main.py`.
To export a `SavedModel` from the trained checkpoint:
```bash
python cifar10_main.py --data_dir <DATA_DIR>/cifar-10-batches-bin --model_dir <MODEL_DIR> --eval_only --export_dir <EXPORT_DIR>
```
Note: The `<EXPORT_DIR>` must be present. You might want to run `mkdir <EXPORT_DIR>` beforehand.
The `SavedModel` can then be [loaded](https://www.tensorflow.org/guide/saved_model#loading_a_savedmodel_in_python) in order to use the ResNet for prediction.
## ImageNet
### Setup
To begin, you will need to download the ImageNet dataset and convert it to
TFRecord format. The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy)
provide a few options.
Once your dataset is ready, you can begin training the model as follows:
```bash
python imagenet_main.py --data_dir=/path/to/imagenet
```
The model will begin training and will automatically evaluate itself on the
validation data roughly once per epoch.
Note that there are a number of other options you can specify, including
`--model_dir` to choose where to store the model and `--resnet_size` to choose
the model size (options include ResNet-18 through ResNet-200). See
[`resnet_run_loop.py`](resnet_run_loop.py) for the full list of options.
## Compute Devices
Training is accomplished using the DistributionStrategies API. (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/distribute/README.md)
The appropriate distribution strategy is chosen based on the `--num_gpus` flag.
By default this flag is one if TensorFlow is compiled with CUDA, and zero
otherwise.
num_gpus:
+ 0: Use OneDeviceStrategy and train on CPU.
+ 1: Use OneDeviceStrategy and train on GPU.
+ 2+: Use MirroredStrategy (data parallelism) to distribute a batch between devices.
### Pre-trained model
You can download pre-trained versions of ResNet-50. Reported accuracies are top-1 single-crop accuracy for the ImageNet validation set.
Models are reported as both checkpoints produced by Estimator during training, and as SavedModels which are more portable. Checkpoints are fragile,
and these are not guaranteed to work with future versions of the code. Both ResNet v1
and ResNet v2 have been trained in both fp16 and fp32 precision. (Here v1 refers to "v1.5". See the note above.) Furthermore, SavedModels
are generated to accept either tensor or JPG inputs, and with channels_first (NCHW) and channels_last (NHWC) convolutions. NCHW is generally
better for GPUs, while NHWC is generally better for CPUs. See the TensorFlow [performance guide](https://www.tensorflow.org/performance/performance_guide#data_formats)
for more details.
ResNet-50 v2 (fp32, Accuracy 76.47%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v2_fp32_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v2 (fp16, Accuracy 76.56%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v2_fp16_20180928.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v1 (fp32, Accuracy 76.53%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v1_fp32_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v1 (fp16, Accuracy 76.18%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v1_fp16_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NHWC_jpg.tar.gz)
### Transfer Learning
You can use a pretrained model to initialize a training process. In addition you are able to freeze all but the final fully connected layers to fine tune your model. Transfer Learning is useful when training on your own small datasets. For a brief look at transfer learning in the context of convolutional neural networks, we recommend reading these [short notes](http://cs231n.github.io/transfer-learning/).
To fine tune a pretrained resnet you must make three changes to your training procedure:
1) Build the exact same model as previously except we change the number of labels in the final classification layer.
2) Restore all weights from the pre-trained resnet except for the final classification layer; this will get randomly initialized instead.
3) Freeze earlier layers of the network
We can perform these three operations by specifying two flags: ```--pretrained_model_checkpoint_path``` and ```--fine_tune```. The first flag is a string that points to the path of a pre-trained resnet model. If this flag is specified, it will load all but the final classification layer. A key thing to note: if both ```--pretrained_model_checkpoint_path``` and a non empty ```model_dir``` directory are passed, the tensorflow estimator will load only the ```model_dir```. For more on this please see [WarmStartSettings](https://www.tensorflow.org/versions/master/api_docs/python/tf/estimator/WarmStartSettings) and [Estimators](https://www.tensorflow.org/guide/estimators).
The second flag ```--fine_tune``` is a boolean that indicates whether earlier layers of the network should be frozen. You may set this flag to false if you wish to continue training a pre-trained model from a checkpoint. If you set this flag to true, you can train a new classification layer from scratch.
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Downloads and extracts the binary version of the CIFAR-10 dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import tarfile
from six.moves import urllib
import tensorflow as tf
DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
parser = argparse.ArgumentParser()
parser.add_argument(
'--data_dir', type=str, default='/tmp/cifar10_data',
help='Directory to download data and extract the tarball')
def main(_):
"""Download and extract the tarball from Alex's website."""
if not os.path.exists(FLAGS.data_dir):
os.makedirs(FLAGS.data_dir)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(FLAGS.data_dir, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, 100.0 * count * block_size / total_size))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(FLAGS.data_dir)
if __name__ == '__main__':
FLAGS, unparsed = parser.parse_known_args()
tf.compat.v1.app.run(argv=[sys.argv[0]] + unparsed)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the CIFAR-10 dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app as absl_app
from absl import flags
from absl import logging
from six.moves import range
import tensorflow as tf
from official.r1.resnet import resnet_model
from official.r1.resnet import resnet_run_loop
from official.r1.utils.logs import logger
from official.utils.flags import core as flags_core
HEIGHT = 32
WIDTH = 32
NUM_CHANNELS = 3
_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
NUM_CLASSES = 10
_NUM_DATA_FILES = 5
# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
NUM_IMAGES = {
'train': 50000,
'validation': 10000,
}
DATASET_NAME = 'CIFAR-10'
###############################################################################
# Data processing
###############################################################################
def get_filenames(is_training, data_dir):
"""Returns a list of filenames."""
assert tf.io.gfile.exists(data_dir), (
'Run cifar10_download_and_extract.py first to download and extract the '
'CIFAR-10 data.')
if is_training:
return [
os.path.join(data_dir, 'data_batch_%d.bin' % i)
for i in range(1, _NUM_DATA_FILES + 1)
]
else:
return [os.path.join(data_dir, 'test_batch.bin')]
def parse_record(raw_record, is_training, dtype):
"""Parse CIFAR-10 image and label from a raw record."""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector = tf.io.decode_raw(raw_record, tf.uint8)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label = tf.cast(record_vector[0], tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
[NUM_CHANNELS, HEIGHT, WIDTH])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image = tf.cast(tf.transpose(a=depth_major, perm=[1, 2, 0]), tf.float32)
image = preprocess_image(image, is_training)
image = tf.cast(image, dtype)
return image, label
def preprocess_image(image, is_training):
"""Preprocess a single image of layout [height, width, depth]."""
if is_training:
# Resize the image to add four extra pixels on each side.
image = tf.image.resize_with_crop_or_pad(
image, HEIGHT + 8, WIDTH + 8)
# Randomly crop a [HEIGHT, WIDTH] section of the image.
image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
# Randomly flip the image horizontally.
image = tf.image.random_flip_left_right(image)
# Subtract off the mean and divide by the variance of the pixels.
image = tf.image.per_image_standardization(image)
return image
def input_fn(is_training,
data_dir,
batch_size,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
input_context=None,
drop_remainder=False):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
Returns:
A dataset that can be used for iteration.
"""
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)
if input_context:
logging.info(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
input_context.input_pipeline_id, input_context.num_input_pipelines)
dataset = dataset.shard(input_context.num_input_pipelines,
input_context.input_pipeline_id)
return resnet_run_loop.process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=NUM_IMAGES['train'],
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder
)
def get_synth_input_fn(dtype):
return resnet_run_loop.get_synth_input_fn(
HEIGHT, WIDTH, NUM_CHANNELS, NUM_CLASSES, dtype=dtype)
###############################################################################
# Running the model
###############################################################################
class Cifar10Model(resnet_model.Model):
"""Model class with appropriate defaults for CIFAR-10 data."""
def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
resnet_version=resnet_model.DEFAULT_VERSION,
dtype=resnet_model.DEFAULT_DTYPE):
"""These are the parameters that work for CIFAR-10 data.
Args:
resnet_size: The number of convolutional layers needed in the model.
data_format: Either 'channels_first' or 'channels_last', specifying which
data format to use when setting up the model.
num_classes: The number of output classes needed from the model. This
enables users to extend the same model to their own datasets.
resnet_version: Integer representing which version of the ResNet network
to use. See README for details. Valid values: [1, 2]
dtype: The TensorFlow dtype to use for calculations.
Raises:
ValueError: if invalid resnet_size is chosen
"""
if resnet_size % 6 != 2:
raise ValueError('resnet_size must be 6n + 2:', resnet_size)
num_blocks = (resnet_size - 2) // 6
super(Cifar10Model, self).__init__(
resnet_size=resnet_size,
bottleneck=False,
num_classes=num_classes,
num_filters=16,
kernel_size=3,
conv_stride=1,
first_pool_size=None,
first_pool_stride=None,
block_sizes=[num_blocks] * 3,
block_strides=[1, 2, 2],
resnet_version=resnet_version,
data_format=data_format,
dtype=dtype
)
def cifar10_model_fn(features, labels, mode, params):
"""Model function for CIFAR-10."""
features = tf.reshape(features, [-1, HEIGHT, WIDTH, NUM_CHANNELS])
# Learning rate schedule follows arXiv:1512.03385 for ResNet-56 and under.
learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
batch_size=params['batch_size'] * params.get('num_workers', 1),
batch_denom=128, num_images=NUM_IMAGES['train'],
boundary_epochs=[91, 136, 182], decay_rates=[1, 0.1, 0.01, 0.001])
# Weight decay of 2e-4 diverges from 1e-4 decay used in the ResNet paper
# and seems more stable in testing. The difference was nominal for ResNet-56.
weight_decay = 2e-4
# Empirical testing showed that including batch_normalization variables
# in the calculation of regularized loss helped validation accuracy
# for the CIFAR-10 dataset, perhaps because the regularization prevents
# overfitting on the small data set. We therefore include all vars when
# regularizing and computing loss during training.
def loss_filter_fn(_):
return True
return resnet_run_loop.resnet_model_fn(
features=features,
labels=labels,
mode=mode,
model_class=Cifar10Model,
resnet_size=params['resnet_size'],
weight_decay=weight_decay,
learning_rate_fn=learning_rate_fn,
momentum=0.9,
data_format=params['data_format'],
resnet_version=params['resnet_version'],
loss_scale=params['loss_scale'],
loss_filter_fn=loss_filter_fn,
dtype=params['dtype'],
fine_tune=params['fine_tune']
)
def define_cifar_flags():
resnet_run_loop.define_resnet_flags()
flags.adopt_module_key_flags(resnet_run_loop)
flags_core.set_defaults(data_dir='/tmp/cifar10_data/cifar-10-batches-bin',
model_dir='/tmp/cifar10_model',
resnet_size='56',
train_epochs=182,
epochs_between_evals=10,
batch_size=128,
image_bytes_as_serving_input=False)
def run_cifar(flags_obj):
"""Run ResNet CIFAR-10 training and eval loop.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dictionary of results. Including final accuracy.
"""
if flags_obj.image_bytes_as_serving_input:
logging.fatal(
'--image_bytes_as_serving_input cannot be set to True for CIFAR. '
'This flag is only applicable to ImageNet.')
return
input_function = (flags_obj.use_synthetic_data and
get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
input_fn)
result = resnet_run_loop.resnet_main(
flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
shape=[HEIGHT, WIDTH, NUM_CHANNELS])
return result
def main(_):
with logger.benchmark_context(flags.FLAGS):
run_cifar(flags.FLAGS)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_cifar_flags()
absl_app.run(main)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tempfile import mkstemp
from absl import logging
import numpy as np
import tensorflow as tf
from official.r1.resnet import cifar10_main
from official.utils.testing import integration
logging.set_verbosity(logging.ERROR)
_BATCH_SIZE = 128
_HEIGHT = 32
_WIDTH = 32
_NUM_CHANNELS = 3
class BaseTest(tf.test.TestCase):
"""Tests for the Cifar10 version of Resnet.
"""
_num_validation_images = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(BaseTest, cls).setUpClass()
tf.compat.v1.disable_eager_execution()
cifar10_main.define_cifar_flags()
def setUp(self):
super(BaseTest, self).setUp()
self._num_validation_images = cifar10_main.NUM_IMAGES['validation']
cifar10_main.NUM_IMAGES['validation'] = 4
def tearDown(self):
super(BaseTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
cifar10_main.NUM_IMAGES['validation'] = self._num_validation_images
def test_dataset_input_fn(self):
fake_data = bytearray()
fake_data.append(7)
for i in range(_NUM_CHANNELS):
for _ in range(_HEIGHT * _WIDTH):
fake_data.append(i)
_, filename = mkstemp(dir=self.get_temp_dir())
data_file = open(filename, 'wb')
data_file.write(fake_data)
data_file.close()
fake_dataset = tf.data.FixedLengthRecordDataset(
filename, cifar10_main._RECORD_BYTES) # pylint: disable=protected-access
fake_dataset = fake_dataset.map(
lambda val: cifar10_main.parse_record(val, False, tf.float32))
image, label = tf.compat.v1.data.make_one_shot_iterator(
fake_dataset).get_next()
self.assertAllEqual(label.shape, ())
self.assertAllEqual(image.shape, (_HEIGHT, _WIDTH, _NUM_CHANNELS))
with self.session() as sess:
image, label = sess.run([image, label])
self.assertEqual(label, 7)
for row in image:
for pixel in row:
self.assertAllClose(pixel, np.array([-1.225, 0., 1.225]), rtol=1e-3)
def cifar10_model_fn_helper(self, mode, resnet_version, dtype):
input_fn = cifar10_main.get_synth_input_fn(dtype)
dataset = input_fn(True, '', _BATCH_SIZE)
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
features, labels = iterator.get_next()
spec = cifar10_main.cifar10_model_fn(
features, labels, mode, {
'dtype': dtype,
'resnet_size': 32,
'data_format': 'channels_last',
'batch_size': _BATCH_SIZE,
'resnet_version': resnet_version,
'loss_scale': 128 if dtype == tf.float16 else 1,
'fine_tune': False,
})
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape,
(_BATCH_SIZE, 10))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_cifar10_model_fn_train_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
dtype=tf.float32)
def test_cifar10_model_fn_trainmode__v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
dtype=tf.float32)
def test_cifar10_model_fn_eval_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
dtype=tf.float32)
def test_cifar10_model_fn_eval_mode_v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
dtype=tf.float32)
def test_cifar10_model_fn_predict_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
resnet_version=1, dtype=tf.float32)
def test_cifar10_model_fn_predict_mode_v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
resnet_version=2, dtype=tf.float32)
def _test_cifar10model_shape(self, resnet_version):
batch_size = 135
num_classes = 246
model = cifar10_main.Cifar10Model(32, data_format='channels_last',
num_classes=num_classes,
resnet_version=resnet_version)
fake_input = tf.random.uniform([batch_size, _HEIGHT, _WIDTH, _NUM_CHANNELS])
output = model(fake_input, training=True)
self.assertAllEqual(output.shape, (batch_size, num_classes))
def test_cifar10model_shape_v1(self):
self._test_cifar10model_shape(resnet_version=1)
def test_cifar10model_shape_v2(self):
self._test_cifar10model_shape(resnet_version=2)
def test_cifar10_end_to_end_synthetic_v1(self):
integration.run_synthetic(
main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-batch_size', '4',
'--max_train_steps', '1']
)
def test_cifar10_end_to_end_synthetic_v2(self):
integration.run_synthetic(
main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-batch_size', '4',
'--max_train_steps', '1']
)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Estimator benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
from absl import flags
from absl import logging
from absl.testing import flagsaver
import tensorflow as tf
from official.r1.resnet import cifar10_main as cifar_main
from official.r1.resnet import imagenet_main
from official.r1.utils.logs import hooks
from official.utils.flags import core as flags_core
IMAGENET_DATA_DIR_NAME = 'imagenet'
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
FLAGS = flags.FLAGS
class EstimatorBenchmark(tf.test.Benchmark):
"""Base class to hold methods common to test classes in the module.
Code under test for Estimator models (ResNet50 and 56) report mostly the
same data and require the same FLAG setup.
"""
local_flags = None
def __init__(self, output_dir=None, default_flags=None, flag_methods=None):
if not output_dir:
output_dir = '/tmp'
self.output_dir = output_dir
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
def _get_model_dir(self, folder_name):
"""Returns directory to store info, e.g. saved model and event log."""
return os.path.join(self.output_dir, folder_name)
def _setup(self):
"""Sets up and resets flags before each test."""
logging.set_verbosity(logging.INFO)
if EstimatorBenchmark.local_flags is None:
for flag_method in self.flag_methods:
flag_method()
# Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo'])
# Overrides flag values with defaults for the class of tests.
for k, v in self.default_flags.items():
setattr(FLAGS, k, v)
saved_flag_values = flagsaver.save_flag_values()
EstimatorBenchmark.local_flags = saved_flag_values
else:
flagsaver.restore_flag_values(EstimatorBenchmark.local_flags)
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None):
"""Report benchmark results by writing to local protobuf file.
Args:
stats: dict returned from estimator models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
"""
examples_per_sec_hook = None
for hook in stats['train_hooks']:
if isinstance(hook, hooks.ExamplesPerSecondHook):
examples_per_sec_hook = hook
break
eval_results = stats['eval_results']
metrics = []
if 'accuracy' in eval_results:
metrics.append({'name': 'accuracy_top_1',
'value': float(eval_results['accuracy']),
'min_value': top_1_min,
'max_value': top_1_max})
if 'accuracy_top_5' in eval_results:
metrics.append({'name': 'accuracy_top_5',
'value': float(eval_results['accuracy_top_5'])})
if examples_per_sec_hook:
exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list
# ExamplesPerSecondHook skips the first 10 steps.
exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list))
metrics.append({'name': 'exp_per_second',
'value': exp_per_sec})
flags_str = flags_core.get_nondefault_flags_as_str()
self.report_benchmark(
iters=eval_results.get('global_step', None),
wall_time=wall_time_sec,
metrics=metrics,
extras={'flags': flags_str})
class Resnet50EstimatorAccuracy(EstimatorBenchmark):
"""Benchmark accuracy tests for ResNet50 w/ Estimator."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""Benchmark accuracy tests for ResNet50 w/ Estimator.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods = [imagenet_main.define_imagenet_flags]
self.data_dir = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
super(Resnet50EstimatorAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_graph_8_gpu(self):
"""Test 8 GPUs graph mode."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 128 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu(self):
"""Test FP16 8 GPUs graph mode."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_graph_rewrite_8_gpu(self):
"""Test FP16 graph rewrite 8 GPUs graph mode."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.data_dir = self.data_dir
FLAGS.batch_size = 256 * 8
FLAGS.train_epochs = 90
FLAGS.epochs_between_evals = 10
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_fp16_graph_rewrite_8_gpu')
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = imagenet_main.run_imagenet(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
self._report_benchmark(stats,
wall_time_sec,
top_1_min=0.762,
top_1_max=0.766)
class Resnet50EstimatorBenchmarkBase(EstimatorBenchmark):
"""Base class for benchmarks for ResNet50 using Estimator."""
local_flags = None
def __init__(self, output_dir=None, default_flags=None):
flag_methods = [imagenet_main.define_imagenet_flags]
super(Resnet50EstimatorBenchmarkBase, self).__init__(
output_dir=output_dir,
default_flags=default_flags,
flag_methods=flag_methods)
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = imagenet_main.run_imagenet(FLAGS)
wall_time_sec = time.time() - start_time_sec
print(stats)
# Remove values to skip triggering accuracy check.
stats['eval_results'].pop('accuracy', None)
stats['eval_results'].pop('accuracy_top_5', None)
self._report_benchmark(stats, wall_time_sec)
class Resnet50EstimatorBenchmark(Resnet50EstimatorBenchmarkBase):
"""Benchmarks for ResNet50 using Estimator with 1 worker."""
def __init__(self, output_dir=None, default_flags=None):
super(Resnet50EstimatorBenchmark, self).__init__(
output_dir=output_dir,
default_flags=default_flags)
def benchmark_graph_fp16_1_gpu(self):
"""Benchmarks graph fp16 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
FLAGS.batch_size = 128
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_1_gpu_tweaked(self):
"""Benchmarks graph fp16 1 gpu tweaked."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu_tweaked')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_graph_rewrite_1_gpu_tweaked(self):
"""Benchmarks graph fp16 graph rewrite 1 gpu tweaked."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_fp16_graph_rewrite_1_gpu_tweaked')
FLAGS.batch_size = 256
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_1_gpu(self):
"""Benchmarks graph 1 gpu."""
self._setup()
FLAGS.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
FLAGS.batch_size = 128
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_8_gpu(self):
"""Benchmarks graph 8 gpus."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
FLAGS.batch_size = 128*8
FLAGS.dtype = 'fp32'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu(self):
"""Benchmarks graph fp16 8 gpus."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu_tweaked(self):
"""Benchmarks graph fp16 8 gpus tweaked."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu_tweaked')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_graph_rewrite_8_gpu_tweaked(self):
"""Benchmarks graph fp16 graph rewrite 8 gpus tweaked."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_graph_fp16_graph_rewrite_8_gpu_tweaked')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
class Resnet50EstimatorBenchmarkSynth(Resnet50EstimatorBenchmark):
"""Resnet50 synthetic benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['use_synthetic_data'] = True
def_flags['max_train_steps'] = 110
def_flags['train_epochs'] = 1
super(Resnet50EstimatorBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet50EstimatorBenchmarkReal(Resnet50EstimatorBenchmark):
"""Resnet50 real data benchmark tests."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['data_dir'] = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
def_flags['max_train_steps'] = 110
def_flags['train_epochs'] = 1
super(Resnet50EstimatorBenchmarkReal, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet50MultiWorkerEstimatorBenchmark(Resnet50EstimatorBenchmarkBase):
"""Benchmarks for ResNet50 using Estimator with multiple workers."""
def __init__(self, output_dir=None, default_flags=None):
super(Resnet50MultiWorkerEstimatorBenchmark, self).__init__(
output_dir=output_dir,
default_flags=default_flags)
def benchmark_graph_fp16_8_gpu_ring_tweaked(self):
"""Benchmarks graph fp16 8 gpus with ring collective tweaked."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'multi_worker_mirrored'
FLAGS.all_reduce_alg = 'ring'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.datasets_num_private_threads = 32
FLAGS.model_dir = self._get_model_dir(
folder_name='benchmark_graph_fp16_8_gpu_ring_tweaked')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_8_gpu_nccl_tweaked(self):
"""Benchmarks graph fp16 8 gpus with nccl collective tweaked."""
self._setup()
FLAGS.num_gpus = 8
FLAGS.distribution_strategy = 'multi_worker_mirrored'
FLAGS.all_reduce_alg = 'nccl'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.intra_op_parallelism_threads = 1
FLAGS.datasets_num_private_threads = 32
FLAGS.model_dir = self._get_model_dir(
folder_name='benchmark_graph_fp16_8_gpu_nccl_tweaked')
FLAGS.batch_size = 256*8
FLAGS.dtype = 'fp16'
FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
class Resnet50MultiWorkerEstimatorBenchmarkSynth(
Resnet50MultiWorkerEstimatorBenchmark):
"""ResNet50, multi-worker, Estimator, synthetic data."""
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
def_flags = {}
def_flags['use_synthetic_data'] = True
def_flags['max_train_steps'] = 110
def_flags['train_epochs'] = 1
super(Resnet50MultiWorkerEstimatorBenchmarkSynth, self).__init__(
output_dir=output_dir, default_flags=def_flags)
class Resnet56EstimatorAccuracy(EstimatorBenchmark):
"""Accuracy tests for Estimator ResNet56."""
local_flags = None
def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods = [cifar_main.define_cifar_flags]
self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
super(Resnet56EstimatorAccuracy, self).__init__(
output_dir=output_dir, flag_methods=flag_methods)
def benchmark_graph_1_gpu(self):
"""Test layers model with Estimator and distribution strategies."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_1_gpu(self):
"""Test layers FP16 model with Estimator and distribution strategies."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_2_gpu(self):
"""Test layers model with Estimator and dist_strat. 2 GPUs."""
self._setup()
flags.FLAGS.num_gpus = 2
flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def benchmark_graph_fp16_2_gpu(self):
"""Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
self._setup()
flags.FLAGS.num_gpus = 2
flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def unit_test(self):
"""A lightweight test that can finish quickly."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = self.data_dir
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 1
flags.FLAGS.model_dir = self._get_model_dir('unit_test')
flags.FLAGS.resnet_size = 8
flags.FLAGS.dtype = 'fp32'
flags.FLAGS.hooks = ['ExamplesPerSecondHook']
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
"""Executes benchmark and reports result."""
start_time_sec = time.time()
stats = cifar_main.run_cifar(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
self._report_benchmark(stats,
wall_time_sec,
top_1_min=0.926,
top_1_max=0.938)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app as absl_app
from absl import flags
from absl import logging
from six.moves import range
import tensorflow as tf
from official.r1.resnet import imagenet_preprocessing
from official.r1.resnet import resnet_model
from official.r1.resnet import resnet_run_loop
from official.r1.utils.logs import logger
from official.utils.flags import core as flags_core
DEFAULT_IMAGE_SIZE = 224
NUM_CHANNELS = 3
NUM_CLASSES = 1001
NUM_IMAGES = {
'train': 1281167,
'validation': 50000,
}
_NUM_TRAIN_FILES = 1024
_SHUFFLE_BUFFER = 10000
DATASET_NAME = 'ImageNet'
###############################################################################
# Data processing
###############################################################################
def get_filenames(is_training, data_dir):
"""Return filenames for dataset."""
if is_training:
return [
os.path.join(data_dir, 'train-%05d-of-01024' % i)
for i in range(_NUM_TRAIN_FILES)]
else:
return [
os.path.join(data_dir, 'validation-%05d-of-00128' % i)
for i in range(128)]
def _parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields (values are included as examples):
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.io.FixedLenFeature([], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update(
{k: sparse_float32 for k in ['image/object/bbox/xmin',
'image/object/bbox/ymin',
'image/object/bbox/xmax',
'image/object/bbox/ymax']})
features = tf.io.parse_single_example(serialized=example_serialized,
features=feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(a=bbox, perm=[0, 2, 1])
return features['image/encoded'], label, bbox
def parse_record(raw_record, is_training, dtype):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
Args:
raw_record: scalar Tensor tf.string containing a serialized
Example protocol buffer.
is_training: A boolean denoting whether the input is for training.
dtype: data type to use for images/features.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
image_buffer, label, bbox = _parse_example_proto(raw_record)
image = imagenet_preprocessing.preprocess_image(
image_buffer=image_buffer,
bbox=bbox,
output_height=DEFAULT_IMAGE_SIZE,
output_width=DEFAULT_IMAGE_SIZE,
num_channels=NUM_CHANNELS,
is_training=is_training)
image = tf.cast(image, dtype)
return image, label
def input_fn(is_training,
data_dir,
batch_size,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
input_context=None,
drop_remainder=False,
tf_data_experimental_slack=False):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
tf_data_experimental_slack: Whether to enable tf.data's
`experimental_slack` option.
Returns:
A dataset that can be used for iteration.
"""
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.Dataset.from_tensor_slices(filenames)
if input_context:
logging.info(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
input_context.input_pipeline_id, input_context.num_input_pipelines)
dataset = dataset.shard(input_context.num_input_pipelines,
input_context.input_pipeline_id)
if is_training:
# Shuffle the input files
dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
# Convert to individual records.
# cycle_length = 10 means that up to 10 files will be read and deserialized in
# parallel. You may want to increase this number if you have a large number of
# CPU cores.
dataset = dataset.interleave(
tf.data.TFRecordDataset,
cycle_length=10,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
return resnet_run_loop.process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=_SHUFFLE_BUFFER,
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder,
tf_data_experimental_slack=tf_data_experimental_slack,
)
def get_synth_input_fn(dtype):
return resnet_run_loop.get_synth_input_fn(
DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS, NUM_CLASSES,
dtype=dtype)
###############################################################################
# Running the model
###############################################################################
class ImagenetModel(resnet_model.Model):
"""Model class with appropriate defaults for Imagenet data."""
def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
resnet_version=resnet_model.DEFAULT_VERSION,
dtype=resnet_model.DEFAULT_DTYPE):
"""These are the parameters that work for Imagenet data.
Args:
resnet_size: The number of convolutional layers needed in the model.
data_format: Either 'channels_first' or 'channels_last', specifying which
data format to use when setting up the model.
num_classes: The number of output classes needed from the model. This
enables users to extend the same model to their own datasets.
resnet_version: Integer representing which version of the ResNet network
to use. See README for details. Valid values: [1, 2]
dtype: The TensorFlow dtype to use for calculations.
"""
# For bigger models, we want to use "bottleneck" layers
if resnet_size < 50:
bottleneck = False
else:
bottleneck = True
super(ImagenetModel, self).__init__(
resnet_size=resnet_size,
bottleneck=bottleneck,
num_classes=num_classes,
num_filters=64,
kernel_size=7,
conv_stride=2,
first_pool_size=3,
first_pool_stride=2,
block_sizes=_get_block_sizes(resnet_size),
block_strides=[1, 2, 2, 2],
resnet_version=resnet_version,
data_format=data_format,
dtype=dtype
)
def _get_block_sizes(resnet_size):
"""Retrieve the size of each block_layer in the ResNet model.
The number of block layers used for the Resnet model varies according
to the size of the model. This helper grabs the layer set we want, throwing
an error if a non-standard size has been selected.
Args:
resnet_size: The number of convolutional layers needed in the model.
Returns:
A list of block sizes to use in building the model.
Raises:
KeyError: if invalid resnet_size is received.
"""
choices = {
18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
200: [3, 24, 36, 3]
}
try:
return choices[resnet_size]
except KeyError:
err = ('Could not find layers for selected Resnet size.\n'
'Size received: {}; sizes allowed: {}.'.format(
resnet_size, list(choices.keys())))
raise ValueError(err)
def imagenet_model_fn(features, labels, mode, params):
"""Our model_fn for ResNet to be used with our Estimator."""
# Warmup and higher lr may not be valid for fine tuning with small batches
# and smaller numbers of training images.
if params['fine_tune']:
warmup = False
base_lr = .1
else:
warmup = True
base_lr = .128
learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
batch_size=params['batch_size'] * params.get('num_workers', 1),
batch_denom=256, num_images=NUM_IMAGES['train'],
boundary_epochs=[30, 60, 80, 90], decay_rates=[1, 0.1, 0.01, 0.001, 1e-4],
warmup=warmup, base_lr=base_lr)
return resnet_run_loop.resnet_model_fn(
features=features,
labels=labels,
mode=mode,
model_class=ImagenetModel,
resnet_size=params['resnet_size'],
weight_decay=flags.FLAGS.weight_decay,
learning_rate_fn=learning_rate_fn,
momentum=0.9,
data_format=params['data_format'],
resnet_version=params['resnet_version'],
loss_scale=params['loss_scale'],
loss_filter_fn=None,
dtype=params['dtype'],
fine_tune=params['fine_tune'],
label_smoothing=flags.FLAGS.label_smoothing
)
def define_imagenet_flags():
resnet_run_loop.define_resnet_flags(
resnet_size_choices=['18', '34', '50', '101', '152', '200'],
dynamic_loss_scale=True,
fp16_implementation=True)
flags.adopt_module_key_flags(resnet_run_loop)
flags_core.set_defaults(train_epochs=90)
def run_imagenet(flags_obj):
"""Run ResNet ImageNet training and eval loop.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dict of results of the run. Contains the keys `eval_results` and
`train_hooks`. `eval_results` contains accuracy (top_1) and
accuracy_top_5. `train_hooks` is a list the instances of hooks used during
training.
"""
input_function = (flags_obj.use_synthetic_data and
get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
input_fn)
result = resnet_run_loop.resnet_main(
flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])
return result
def main(_):
with logger.benchmark_context(flags.FLAGS):
run_imagenet(flags.FLAGS)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_imagenet_flags()
absl_app.run(main)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images.
Training images are sampled using the provided bounding boxes, and subsequently
cropped to the sampled bounding box. Images are additionally flipped randomly,
then resized to the target output size (without aspect-ratio preservation).
Images used during evaluation are resized (with aspect-ratio preservation) and
centrally cropped.
All images undergo mean color subtraction.
Note that these steps are colloquially referred to as "ResNet preprocessing,"
and they differ from "VGG preprocessing," which does not use bounding boxes
and instead does an aspect-preserving resize followed by random crop during
training. (These both differ from "Inception preprocessing," which introduces
color distortion steps.)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
# The lower bound for the smallest side of the image for aspect-preserving
# resizing. For example, if an image is 500 x 1000, it will be resized to
# _RESIZE_MIN x (_RESIZE_MIN * 2).
_RESIZE_MIN = 256
def _decode_crop_and_flip(image_buffer, bbox, num_channels):
"""Crops the given image to a random part of the image, and randomly flips.
We use the fused decode_and_crop op, which performs better than the two ops
used separately in series, but note that this requires that the image be
passed in as an un-decoded string Tensor.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
num_channels: Integer depth of the image buffer for decoding.
Returns:
3-D tensor with cropped image.
"""
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=0.1,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.05, 1.0],
max_attempts=100,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, _ = sample_distorted_bounding_box
# Reassemble the bounding box in the format the crop op requires.
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
# Use the fused decode and crop op here, which is faster than each in series.
cropped = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=num_channels)
# Flip to add a little more random distortion in.
cropped = tf.image.random_flip_left_right(cropped)
return cropped
def _central_crop(image, crop_height, crop_width):
"""Performs central crops of the given image list.
Args:
image: a 3-D image tensor
crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop.
Returns:
3-D tensor with cropped image.
"""
shape = tf.shape(input=image)
height, width = shape[0], shape[1]
amount_to_be_cropped_h = (height - crop_height)
crop_top = amount_to_be_cropped_h // 2
amount_to_be_cropped_w = (width - crop_width)
crop_left = amount_to_be_cropped_w // 2
return tf.slice(
image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
def _mean_image_subtraction(image, means, num_channels):
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image = _mean_image_subtraction(image, means)
Note that the rank of `image` must be known.
Args:
image: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
num_channels: number of color channels in the image that will be distorted.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if image.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
# We have a 1-D tensor of means; convert to 3-D.
# Note(b/130245863): we explicitly call `broadcast` instead of simply
# expanding dimensions for better performance.
means = tf.broadcast_to(means, tf.shape(image))
return image - means
def _smallest_size_at_least(height, width, resize_min):
"""Computes new shape with the smallest side equal to `smallest_side`.
Computes new shape with the smallest side equal to `smallest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
resize_min: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: an int32 scalar tensor indicating the new width.
"""
resize_min = tf.cast(resize_min, tf.float32)
# Convert to floats to make subsequent calculations go smoothly.
height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32)
smaller_dim = tf.minimum(height, width)
scale_ratio = resize_min / smaller_dim
# Convert back to ints to make heights and widths that TF ops will accept.
new_height = tf.cast(height * scale_ratio, tf.int32)
new_width = tf.cast(width * scale_ratio, tf.int32)
return new_height, new_width
def _aspect_preserving_resize(image, resize_min):
"""Resize images preserving the original aspect ratio.
Args:
image: A 3-D image `Tensor`.
resize_min: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
resized_image: A 3-D tensor containing the resized image.
"""
shape = tf.shape(input=image)
height, width = shape[0], shape[1]
new_height, new_width = _smallest_size_at_least(height, width, resize_min)
return _resize_image(image, new_height, new_width)
def _resize_image(image, height, width):
"""Simple wrapper around tf.resize_images.
This is primarily to make sure we use the same `ResizeMethod` and other
details each time.
Args:
image: A 3-D image `Tensor`.
height: The target height for the resized image.
width: The target width for the resized image.
Returns:
resized_image: A 3-D tensor containing the resized image. The first two
dimensions have the shape [height, width].
"""
return tf.compat.v1.image.resize(
image, [height, width], method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
def preprocess_image(image_buffer, bbox, output_height, output_width,
num_channels, is_training=False):
"""Preprocesses the given image.
Preprocessing includes decoding, cropping, and resizing for both training
and eval images. Training preprocessing, however, introduces some random
distortion of the image to improve accuracy.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
num_channels: Integer depth of the image buffer for decoding.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
Returns:
A preprocessed image.
"""
if is_training:
# For training, we want to randomize some of the distortions.
image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
image = _resize_image(image, output_height, output_width)
else:
# For validation, we want to decode, resize, then just crop the middle.
image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
image = _aspect_preserving_resize(image, _RESIZE_MIN)
image = _central_crop(image, output_height, output_width)
image.set_shape([output_height, output_width, num_channels])
return _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import tensorflow as tf # pylint: disable=g-bad-import-order
from absl import logging
from official.r1.resnet import imagenet_main
from official.utils.testing import integration
logging.set_verbosity(logging.ERROR)
_BATCH_SIZE = 32
_LABEL_CLASSES = 1001
class BaseTest(tf.test.TestCase):
_num_validation_images = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(BaseTest, cls).setUpClass()
imagenet_main.define_imagenet_flags()
def setUp(self):
super(BaseTest, self).setUp()
tf.compat.v1.disable_eager_execution()
self._num_validation_images = imagenet_main.NUM_IMAGES['validation']
imagenet_main.NUM_IMAGES['validation'] = 4
def tearDown(self):
super(BaseTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
imagenet_main.NUM_IMAGES['validation'] = self._num_validation_images
def _tensor_shapes_helper(self, resnet_size, resnet_version, dtype, with_gpu):
"""Checks the tensor shapes after each phase of the ResNet model."""
def reshape(shape):
"""Returns the expected dimensions depending on if a GPU is being used."""
# If a GPU is used for the test, the shape is returned (already in NCHW
# form). When GPU is not used, the shape is converted to NHWC.
if with_gpu:
return shape
return shape[0], shape[2], shape[3], shape[1]
graph = tf.Graph()
with graph.as_default(), self.test_session(
graph=graph, use_gpu=with_gpu, force_gpu=with_gpu):
model = imagenet_main.ImagenetModel(
resnet_size=resnet_size,
data_format='channels_first' if with_gpu else 'channels_last',
resnet_version=resnet_version,
dtype=dtype
)
inputs = tf.random.uniform([1, 224, 224, 3])
output = model(inputs, training=True)
initial_conv = graph.get_tensor_by_name('resnet_model/initial_conv:0')
max_pool = graph.get_tensor_by_name('resnet_model/initial_max_pool:0')
block_layer1 = graph.get_tensor_by_name('resnet_model/block_layer1:0')
block_layer2 = graph.get_tensor_by_name('resnet_model/block_layer2:0')
block_layer3 = graph.get_tensor_by_name('resnet_model/block_layer3:0')
block_layer4 = graph.get_tensor_by_name('resnet_model/block_layer4:0')
reduce_mean = graph.get_tensor_by_name('resnet_model/final_reduce_mean:0')
dense = graph.get_tensor_by_name('resnet_model/final_dense:0')
self.assertAllEqual(initial_conv.shape, reshape((1, 64, 112, 112)))
self.assertAllEqual(max_pool.shape, reshape((1, 64, 56, 56)))
# The number of channels after each block depends on whether we're
# using the building_block or the bottleneck_block.
if resnet_size < 50:
self.assertAllEqual(block_layer1.shape, reshape((1, 64, 56, 56)))
self.assertAllEqual(block_layer2.shape, reshape((1, 128, 28, 28)))
self.assertAllEqual(block_layer3.shape, reshape((1, 256, 14, 14)))
self.assertAllEqual(block_layer4.shape, reshape((1, 512, 7, 7)))
self.assertAllEqual(reduce_mean.shape, reshape((1, 512, 1, 1)))
else:
self.assertAllEqual(block_layer1.shape, reshape((1, 256, 56, 56)))
self.assertAllEqual(block_layer2.shape, reshape((1, 512, 28, 28)))
self.assertAllEqual(block_layer3.shape, reshape((1, 1024, 14, 14)))
self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7)))
self.assertAllEqual(reduce_mean.shape, reshape((1, 2048, 1, 1)))
self.assertAllEqual(dense.shape, (1, _LABEL_CLASSES))
self.assertAllEqual(output.shape, (1, _LABEL_CLASSES))
def tensor_shapes_helper(self, resnet_size, resnet_version, with_gpu=False):
self._tensor_shapes_helper(resnet_size=resnet_size,
resnet_version=resnet_version,
dtype=tf.float32, with_gpu=with_gpu)
self._tensor_shapes_helper(resnet_size=resnet_size,
resnet_version=resnet_version,
dtype=tf.float16, with_gpu=with_gpu)
def test_tensor_shapes_resnet_18_v1(self):
self.tensor_shapes_helper(18, resnet_version=1)
def test_tensor_shapes_resnet_18_v2(self):
self.tensor_shapes_helper(18, resnet_version=2)
def test_tensor_shapes_resnet_34_v1(self):
self.tensor_shapes_helper(34, resnet_version=1)
def test_tensor_shapes_resnet_34_v2(self):
self.tensor_shapes_helper(34, resnet_version=2)
def test_tensor_shapes_resnet_50_v1(self):
self.tensor_shapes_helper(50, resnet_version=1)
def test_tensor_shapes_resnet_50_v2(self):
self.tensor_shapes_helper(50, resnet_version=2)
def test_tensor_shapes_resnet_101_v1(self):
self.tensor_shapes_helper(101, resnet_version=1)
def test_tensor_shapes_resnet_101_v2(self):
self.tensor_shapes_helper(101, resnet_version=2)
def test_tensor_shapes_resnet_152_v1(self):
self.tensor_shapes_helper(152, resnet_version=1)
def test_tensor_shapes_resnet_152_v2(self):
self.tensor_shapes_helper(152, resnet_version=2)
def test_tensor_shapes_resnet_200_v1(self):
self.tensor_shapes_helper(200, resnet_version=1)
def test_tensor_shapes_resnet_200_v2(self):
self.tensor_shapes_helper(200, resnet_version=2)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_18_with_gpu_v1(self):
self.tensor_shapes_helper(18, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_18_with_gpu_v2(self):
self.tensor_shapes_helper(18, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_34_with_gpu_v1(self):
self.tensor_shapes_helper(34, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_34_with_gpu_v2(self):
self.tensor_shapes_helper(34, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_50_with_gpu_v1(self):
self.tensor_shapes_helper(50, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_50_with_gpu_v2(self):
self.tensor_shapes_helper(50, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_101_with_gpu_v1(self):
self.tensor_shapes_helper(101, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_101_with_gpu_v2(self):
self.tensor_shapes_helper(101, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_152_with_gpu_v1(self):
self.tensor_shapes_helper(152, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_152_with_gpu_v2(self):
self.tensor_shapes_helper(152, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_200_with_gpu_v1(self):
self.tensor_shapes_helper(200, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_200_with_gpu_v2(self):
self.tensor_shapes_helper(200, resnet_version=2, with_gpu=True)
def resnet_model_fn_helper(self, mode, resnet_version, dtype):
"""Tests that the EstimatorSpec is given the appropriate arguments."""
tf.compat.v1.train.create_global_step()
input_fn = imagenet_main.get_synth_input_fn(dtype)
dataset = input_fn(True, '', _BATCH_SIZE)
iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
features, labels = iterator.get_next()
spec = imagenet_main.imagenet_model_fn(
features, labels, mode, {
'dtype': dtype,
'resnet_size': 50,
'data_format': 'channels_last',
'batch_size': _BATCH_SIZE,
'resnet_version': resnet_version,
'loss_scale': 128 if dtype == tf.float16 else 1,
'fine_tune': False,
})
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape,
(_BATCH_SIZE, _LABEL_CLASSES))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_resnet_model_fn_train_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_train_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
dtype=tf.float32)
def test_resnet_model_fn_eval_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_eval_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
dtype=tf.float32)
def test_resnet_model_fn_predict_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_predict_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=2,
dtype=tf.float32)
def _test_imagenetmodel_shape(self, resnet_version):
batch_size = 135
num_classes = 246
model = imagenet_main.ImagenetModel(
50, data_format='channels_last', num_classes=num_classes,
resnet_version=resnet_version)
fake_input = tf.random.uniform([batch_size, 224, 224, 3])
output = model(fake_input, training=True)
self.assertAllEqual(output.shape, (batch_size, num_classes))
def test_imagenetmodel_shape_v1(self):
self._test_imagenetmodel_shape(resnet_version=1)
def test_imagenetmodel_shape_v2(self):
self._test_imagenetmodel_shape(resnet_version=2)
def test_imagenet_end_to_end_synthetic_v1(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-batch_size', '4',
'--max_train_steps', '1']
)
def test_imagenet_end_to_end_synthetic_v2(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-batch_size', '4',
'--max_train_steps', '1']
)
def test_imagenet_end_to_end_synthetic_v1_tiny(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-batch_size', '4',
'-resnet_size', '18', '--max_train_steps', '1']
)
def test_imagenet_end_to_end_synthetic_v2_tiny(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-batch_size', '4',
'-resnet_size', '18', '--max_train_steps', '1']
)
def test_imagenet_end_to_end_synthetic_v1_huge(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-batch_size', '4',
'-resnet_size', '200', '--max_train_steps', '1']
)
def test_imagenet_end_to_end_synthetic_v2_huge(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-batch_size', '4',
'-resnet_size', '200', '--max_train_steps', '1']
)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for Residual Networks.
Residual networks ('v1' ResNets) were originally proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
The full preactivation 'v2' ResNet variant was introduced by:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
The key difference of the full preactivation 'v2' variant compared to the
'v1' variant in [1] is the use of batch normalization before every weight layer
rather than after.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
_BATCH_NORM_DECAY = 0.997
_BATCH_NORM_EPSILON = 1e-5
DEFAULT_VERSION = 2
DEFAULT_DTYPE = tf.float32
CASTABLE_TYPES = (tf.float16,)
ALLOWED_TYPES = (DEFAULT_DTYPE,) + CASTABLE_TYPES
################################################################################
# Convenience functions for building the ResNet model.
################################################################################
def batch_norm(inputs, training, data_format):
"""Performs a batch normalization using a standard set of parameters."""
# We set fused=True for a significant performance boost. See
# https://www.tensorflow.org/performance/performance_guide#common_fused_ops
return tf.compat.v1.layers.batch_normalization(
inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
scale=True, training=training, fused=True)
def fixed_padding(inputs, kernel_size, data_format):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
Should be a positive integer.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
A tensor with the same format as the input with the data either intact
(if kernel_size == 1) or padded (if kernel_size > 1).
"""
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
if data_format == 'channels_first':
padded_inputs = tf.pad(tensor=inputs,
paddings=[[0, 0], [0, 0], [pad_beg, pad_end],
[pad_beg, pad_end]])
else:
padded_inputs = tf.pad(tensor=inputs,
paddings=[[0, 0], [pad_beg, pad_end],
[pad_beg, pad_end], [0, 0]])
return padded_inputs
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
"""Strided 2-D convolution with explicit padding."""
# The padding is consistent and is based only on `kernel_size`, not on the
# dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
if strides > 1:
inputs = fixed_padding(inputs, kernel_size, data_format)
return tf.compat.v1.layers.conv2d(
inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
kernel_initializer=tf.compat.v1.variance_scaling_initializer(),
data_format=data_format)
################################################################################
# ResNet block definitions.
################################################################################
def _building_block_v1(inputs, filters, training, projection_shortcut, strides,
data_format):
"""A single block for ResNet v1, without a bottleneck.
Convolution then batch normalization then ReLU as described by:
Deep Residual Learning for Image Recognition
https://arxiv.org/pdf/1512.03385.pdf
by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
filters: The number of filters for the convolutions.
training: A Boolean for whether the model is in training or inference
mode. Needed for batch normalization.
projection_shortcut: The function to use for projection shortcuts
(typically a 1x1 convolution when downsampling the input).
strides: The block's stride. If greater than 1, this block will ultimately
downsample the input.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
The output tensor of the block; shape should match inputs.
"""
shortcut = inputs
if projection_shortcut is not None:
shortcut = projection_shortcut(inputs)
shortcut = batch_norm(inputs=shortcut, training=training,
data_format=data_format)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs += shortcut
inputs = tf.nn.relu(inputs)
return inputs
def _building_block_v2(inputs, filters, training, projection_shortcut, strides,
data_format):
"""A single block for ResNet v2, without a bottleneck.
Batch normalization then ReLu then convolution as described by:
Identity Mappings in Deep Residual Networks
https://arxiv.org/pdf/1603.05027.pdf
by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
filters: The number of filters for the convolutions.
training: A Boolean for whether the model is in training or inference
mode. Needed for batch normalization.
projection_shortcut: The function to use for projection shortcuts
(typically a 1x1 convolution when downsampling the input).
strides: The block's stride. If greater than 1, this block will ultimately
downsample the input.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
The output tensor of the block; shape should match inputs.
"""
shortcut = inputs
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
# The projection shortcut should come after the first batch norm and ReLU
# since it performs a 1x1 convolution.
if projection_shortcut is not None:
shortcut = projection_shortcut(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1,
data_format=data_format)
return inputs + shortcut
def _bottleneck_block_v1(inputs, filters, training, projection_shortcut,
strides, data_format):
"""A single block for ResNet v1, with a bottleneck.
Similar to _building_block_v1(), except using the "bottleneck" blocks
described in:
Convolution then batch normalization then ReLU as described by:
Deep Residual Learning for Image Recognition
https://arxiv.org/pdf/1512.03385.pdf
by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
filters: The number of filters for the convolutions.
training: A Boolean for whether the model is in training or inference
mode. Needed for batch normalization.
projection_shortcut: The function to use for projection shortcuts
(typically a 1x1 convolution when downsampling the input).
strides: The block's stride. If greater than 1, this block will ultimately
downsample the input.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
The output tensor of the block; shape should match inputs.
"""
shortcut = inputs
if projection_shortcut is not None:
shortcut = projection_shortcut(inputs)
shortcut = batch_norm(inputs=shortcut, training=training,
data_format=data_format)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs += shortcut
inputs = tf.nn.relu(inputs)
return inputs
def _bottleneck_block_v2(inputs, filters, training, projection_shortcut,
strides, data_format):
"""A single block for ResNet v2, with a bottleneck.
Similar to _building_block_v2(), except using the "bottleneck" blocks
described in:
Convolution then batch normalization then ReLU as described by:
Deep Residual Learning for Image Recognition
https://arxiv.org/pdf/1512.03385.pdf
by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
Adapted to the ordering conventions of:
Batch normalization then ReLu then convolution as described by:
Identity Mappings in Deep Residual Networks
https://arxiv.org/pdf/1603.05027.pdf
by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
filters: The number of filters for the convolutions.
training: A Boolean for whether the model is in training or inference
mode. Needed for batch normalization.
projection_shortcut: The function to use for projection shortcuts
(typically a 1x1 convolution when downsampling the input).
strides: The block's stride. If greater than 1, this block will ultimately
downsample the input.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
The output tensor of the block; shape should match inputs.
"""
shortcut = inputs
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
# The projection shortcut should come after the first batch norm and ReLU
# since it performs a 1x1 convolution.
if projection_shortcut is not None:
shortcut = projection_shortcut(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
data_format=data_format)
return inputs + shortcut
def block_layer(inputs, filters, bottleneck, block_fn, blocks, strides,
training, name, data_format):
"""Creates one layer of blocks for the ResNet model.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
filters: The number of filters for the first convolution of the layer.
bottleneck: Is the block created a bottleneck block.
block_fn: The block to use within the model, either `building_block` or
`bottleneck_block`.
blocks: The number of blocks contained in the layer.
strides: The stride to use for the first convolution of the layer. If
greater than 1, this layer will ultimately downsample the input.
training: Either True or False, whether we are currently training the
model. Needed for batch norm.
name: A string name for the tensor output of the block layer.
data_format: The input format ('channels_last' or 'channels_first').
Returns:
The output tensor of the block layer.
"""
# Bottleneck blocks end with 4x the number of filters as they start with
filters_out = filters * 4 if bottleneck else filters
def projection_shortcut(inputs):
return conv2d_fixed_padding(
inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
data_format=data_format)
# Only the first block per block_layer uses projection_shortcut and strides
inputs = block_fn(inputs, filters, training, projection_shortcut, strides,
data_format)
for _ in range(1, blocks):
inputs = block_fn(inputs, filters, training, None, 1, data_format)
return tf.identity(inputs, name)
class Model(object):
"""Base class for building the Resnet Model."""
def __init__(self, resnet_size, bottleneck, num_classes, num_filters,
kernel_size,
conv_stride, first_pool_size, first_pool_stride,
block_sizes, block_strides,
resnet_version=DEFAULT_VERSION, data_format=None,
dtype=DEFAULT_DTYPE):
"""Creates a model for classifying an image.
Args:
resnet_size: A single integer for the size of the ResNet model.
bottleneck: Use regular blocks or bottleneck blocks.
num_classes: The number of classes used as labels.
num_filters: The number of filters to use for the first block layer
of the model. This number is then doubled for each subsequent block
layer.
kernel_size: The kernel size to use for convolution.
conv_stride: stride size for the initial convolutional layer
first_pool_size: Pool size to be used for the first pooling layer.
If none, the first pooling layer is skipped.
first_pool_stride: stride size for the first pooling layer. Not used
if first_pool_size is None.
block_sizes: A list containing n values, where n is the number of sets of
block layers desired. Each value should be the number of blocks in the
i-th set.
block_strides: List of integers representing the desired stride size for
each of the sets of block layers. Should be same length as block_sizes.
resnet_version: Integer representing which version of the ResNet network
to use. See README for details. Valid values: [1, 2]
data_format: Input format ('channels_last', 'channels_first', or None).
If set to None, the format is dependent on whether a GPU is available.
dtype: The TensorFlow dtype to use for calculations. If not specified
tf.float32 is used.
Raises:
ValueError: if invalid version is selected.
"""
self.resnet_size = resnet_size
if not data_format:
data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
else 'channels_last')
self.resnet_version = resnet_version
if resnet_version not in (1, 2):
raise ValueError(
'Resnet version should be 1 or 2. See README for citations.')
self.bottleneck = bottleneck
if bottleneck:
if resnet_version == 1:
self.block_fn = _bottleneck_block_v1
else:
self.block_fn = _bottleneck_block_v2
else:
if resnet_version == 1:
self.block_fn = _building_block_v1
else:
self.block_fn = _building_block_v2
if dtype not in ALLOWED_TYPES:
raise ValueError('dtype must be one of: {}'.format(ALLOWED_TYPES))
self.data_format = data_format
self.num_classes = num_classes
self.num_filters = num_filters
self.kernel_size = kernel_size
self.conv_stride = conv_stride
self.first_pool_size = first_pool_size
self.first_pool_stride = first_pool_stride
self.block_sizes = block_sizes
self.block_strides = block_strides
self.dtype = dtype
self.pre_activation = resnet_version == 2
def _custom_dtype_getter(self, getter, name, shape=None, dtype=DEFAULT_DTYPE,
*args, **kwargs):
"""Creates variables in fp32, then casts to fp16 if necessary.
This function is a custom getter. A custom getter is a function with the
same signature as tf.get_variable, except it has an additional getter
parameter. Custom getters can be passed as the `custom_getter` parameter of
tf.variable_scope. Then, tf.get_variable will call the custom getter,
instead of directly getting a variable itself. This can be used to change
the types of variables that are retrieved with tf.get_variable.
The `getter` parameter is the underlying variable getter, that would have
been called if no custom getter was used. Custom getters typically get a
variable with `getter`, then modify it in some way.
This custom getter will create an fp32 variable. If a low precision
(e.g. float16) variable was requested it will then cast the variable to the
requested dtype. The reason we do not directly create variables in low
precision dtypes is that applying small gradients to such variables may
cause the variable not to change.
Args:
getter: The underlying variable getter, that has the same signature as
tf.get_variable and returns a variable.
name: The name of the variable to get.
shape: The shape of the variable to get.
dtype: The dtype of the variable to get. Note that if this is a low
precision dtype, the variable will be created as a tf.float32 variable,
then cast to the appropriate dtype
*args: Additional arguments to pass unmodified to getter.
**kwargs: Additional keyword arguments to pass unmodified to getter.
Returns:
A variable which is cast to fp16 if necessary.
"""
if dtype in CASTABLE_TYPES:
var = getter(name, shape, tf.float32, *args, **kwargs)
return tf.cast(var, dtype=dtype, name=name + '_cast')
else:
return getter(name, shape, dtype, *args, **kwargs)
def _model_variable_scope(self):
"""Returns a variable scope that the model should be created under.
If self.dtype is a castable type, model variable will be created in fp32
then cast to self.dtype before being used.
Returns:
A variable scope for the model.
"""
return tf.compat.v1.variable_scope('resnet_model',
custom_getter=self._custom_dtype_getter)
def __call__(self, inputs, training):
"""Add operations to classify a batch of input images.
Args:
inputs: A Tensor representing a batch of input images.
training: A boolean. Set to True to add operations required only when
training the classifier.
Returns:
A logits Tensor with shape [<batch_size>, self.num_classes].
"""
with self._model_variable_scope():
if self.data_format == 'channels_first':
# Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
# This provides a large performance boost on GPU. See
# https://www.tensorflow.org/performance/performance_guide#data_formats
inputs = tf.transpose(a=inputs, perm=[0, 3, 1, 2])
inputs = conv2d_fixed_padding(
inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
strides=self.conv_stride, data_format=self.data_format)
inputs = tf.identity(inputs, 'initial_conv')
# We do not include batch normalization or activation functions in V2
# for the initial conv1 because the first ResNet unit will perform these
# for both the shortcut and non-shortcut paths as part of the first
# block's projection. Cf. Appendix of [2].
if self.resnet_version == 1:
inputs = batch_norm(inputs, training, self.data_format)
inputs = tf.nn.relu(inputs)
if self.first_pool_size:
inputs = tf.compat.v1.layers.max_pooling2d(
inputs=inputs, pool_size=self.first_pool_size,
strides=self.first_pool_stride, padding='SAME',
data_format=self.data_format)
inputs = tf.identity(inputs, 'initial_max_pool')
for i, num_blocks in enumerate(self.block_sizes):
num_filters = self.num_filters * (2**i)
inputs = block_layer(
inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
block_fn=self.block_fn, blocks=num_blocks,
strides=self.block_strides[i], training=training,
name='block_layer{}'.format(i + 1), data_format=self.data_format)
# Only apply the BN and ReLU for model that does pre_activation in each
# building/bottleneck block, eg resnet V2.
if self.pre_activation:
inputs = batch_norm(inputs, training, self.data_format)
inputs = tf.nn.relu(inputs)
# The current top layer has shape
# `batch_size x pool_size x pool_size x final_size`.
# ResNet does an Average Pooling layer over pool_size,
# but that is the same as doing a reduce_mean. We do a reduce_mean
# here because it performs better than AveragePooling2D.
axes = [2, 3] if self.data_format == 'channels_first' else [1, 2]
inputs = tf.reduce_mean(input_tensor=inputs, axis=axes, keepdims=True)
inputs = tf.identity(inputs, 'final_reduce_mean')
inputs = tf.squeeze(inputs, axes)
inputs = tf.compat.v1.layers.dense(inputs=inputs, units=self.num_classes)
inputs = tf.identity(inputs, 'final_dense')
return inputs
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains utility and supporting functions for ResNet.
This module contains ResNet code which does not directly build layers. This
includes dataset management, hyperparameter and optimizer code, and argument
parsing. Code for defining the ResNet layers can be found in resnet_model.py.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import math
import multiprocessing
import os
from absl import flags
from absl import logging
import tensorflow as tf
from official.r1.resnet import imagenet_preprocessing
from official.r1.resnet import resnet_model
from official.r1.utils import export
from official.r1.utils.logs import hooks_helper
from official.r1.utils.logs import logger
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers
################################################################################
# Functions for input processing.
################################################################################
def process_record_dataset(dataset,
is_training,
batch_size,
shuffle_buffer,
parse_record_fn,
num_epochs=1,
dtype=tf.float32,
datasets_num_private_threads=None,
drop_remainder=False,
tf_data_experimental_slack=False):
"""Given a Dataset with raw records, return an iterator over the records.
Args:
dataset: A Dataset representing raw records
is_training: A boolean denoting whether the input is for training.
batch_size: The number of samples per batch.
shuffle_buffer: The buffer size to use when shuffling records. A larger
value results in better randomness, but smaller values reduce startup
time and use less memory.
parse_record_fn: A function that takes a raw record and returns the
corresponding (image, label) pair.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features.
datasets_num_private_threads: Number of threads for a private
threadpool created for all datasets computation.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
tf_data_experimental_slack: Whether to enable tf.data's
`experimental_slack` option.
Returns:
Dataset of (image, label) pairs ready for iteration.
"""
# Defines a specific size thread pool for tf.data operations.
if datasets_num_private_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = (
datasets_num_private_threads)
dataset = dataset.with_options(options)
logging.info('datasets_num_private_threads: %s',
datasets_num_private_threads)
# Disable intra-op parallelism to optimize for throughput instead of latency.
options = tf.data.Options()
options.experimental_threading.max_intra_op_parallelism = 1
dataset = dataset.with_options(options)
# Prefetches a batch at a time to smooth out the time taken to load input
# files for shuffling and processing.
dataset = dataset.prefetch(buffer_size=batch_size)
if is_training:
# Shuffles records before repeating to respect epoch boundaries.
dataset = dataset.shuffle(buffer_size=shuffle_buffer)
# Repeats the dataset for the number of epochs to train.
dataset = dataset.repeat(num_epochs)
# Parses the raw records into images and labels.
dataset = dataset.map(
lambda value: parse_record_fn(value, is_training, dtype),
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
# Operations between the final prefetch and the get_next call to the iterator
# will happen synchronously during run time. We prefetch here again to
# background all of the above processing work and keep it out of the
# critical training path. Setting buffer_size to tf.data.experimental.AUTOTUNE
# allows DistributionStrategies to adjust how many batches to fetch based
# on how many devices are present.
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
if tf_data_experimental_slack:
options = tf.data.Options()
options.experimental_slack = True
dataset = dataset.with_options(options)
return dataset
def get_synth_input_fn(height, width, num_channels, num_classes,
dtype=tf.float32):
"""Returns an input function that returns a dataset with random data.
This input_fn returns a data set that iterates over a set of random data and
bypasses all preprocessing, e.g. jpeg decode and copy. The host to device
copy is still included. This used to find the upper throughput bound when
tunning the full input pipeline.
Args:
height: Integer height that will be used to create a fake image tensor.
width: Integer width that will be used to create a fake image tensor.
num_channels: Integer depth that will be used to create a fake image tensor.
num_classes: Number of classes that should be represented in the fake labels
tensor
dtype: Data type for features/images.
Returns:
An input_fn that can be used in place of a real one to return a dataset
that can be used for iteration.
"""
# pylint: disable=unused-argument
def input_fn(is_training, data_dir, batch_size, *args, **kwargs):
"""Returns dataset filled with random data."""
# Synthetic input should be within [0, 255].
inputs = tf.random.truncated_normal(
[batch_size] + [height, width, num_channels],
dtype=dtype,
mean=127,
stddev=60,
name='synthetic_inputs')
labels = tf.random.uniform(
[batch_size],
minval=0,
maxval=num_classes - 1,
dtype=tf.int32,
name='synthetic_labels')
data = tf.data.Dataset.from_tensors((inputs, labels)).repeat()
data = data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return data
return input_fn
def image_bytes_serving_input_fn(image_shape, dtype=tf.float32):
"""Serving input fn for raw jpeg images."""
def _preprocess_image(image_bytes):
"""Preprocess a single raw image."""
# Bounding box around the whole image.
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=dtype, shape=[1, 1, 4])
height, width, num_channels = image_shape
image = imagenet_preprocessing.preprocess_image(
image_bytes, bbox, height, width, num_channels, is_training=False)
return image
image_bytes_list = tf.compat.v1.placeholder(
shape=[None], dtype=tf.string, name='input_tensor')
images = tf.map_fn(
_preprocess_image, image_bytes_list, back_prop=False, dtype=dtype)
return tf.estimator.export.TensorServingInputReceiver(
images, {'image_bytes': image_bytes_list})
def override_flags_and_set_envars_for_gpu_thread_pool(flags_obj):
"""Override flags and set env_vars for performance.
These settings exist to test the difference between using stock settings
and manual tuning. It also shows some of the ENV_VARS that can be tweaked to
squeeze a few extra examples per second. These settings are defaulted to the
current platform of interest, which changes over time.
On systems with small numbers of cpu cores, e.g. under 8 logical cores,
setting up a gpu thread pool with `tf_gpu_thread_mode=gpu_private` may perform
poorly.
Args:
flags_obj: Current flags, which will be adjusted possibly overriding
what has been set by the user on the command-line.
"""
cpu_count = multiprocessing.cpu_count()
logging.info('Logical CPU cores: %s', cpu_count)
# Sets up thread pool for each GPU for op scheduling.
per_gpu_thread_count = 1
total_gpu_thread_count = per_gpu_thread_count * flags_obj.num_gpus
os.environ['TF_GPU_THREAD_MODE'] = flags_obj.tf_gpu_thread_mode
os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count)
logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT'])
logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE'])
# Reduces general thread pool by number of threads used for GPU pool.
main_thread_count = cpu_count - total_gpu_thread_count
flags_obj.inter_op_parallelism_threads = main_thread_count
# Sets thread count for tf.data. Logical cores minus threads assign to the
# private GPU pool along with 2 thread per GPU for event monitoring and
# sending / receiving tensors.
num_monitoring_threads = 2 * flags_obj.num_gpus
flags_obj.datasets_num_private_threads = (cpu_count - total_gpu_thread_count
- num_monitoring_threads)
################################################################################
# Functions for running training/eval/validation loops for the model.
################################################################################
def learning_rate_with_decay(
batch_size, batch_denom, num_images, boundary_epochs, decay_rates,
base_lr=0.1, warmup=False):
"""Get a learning rate that decays step-wise as training progresses.
Args:
batch_size: the number of examples processed in each training batch.
batch_denom: this value will be used to scale the base learning rate.
`0.1 * batch size` is divided by this number, such that when
batch_denom == batch_size, the initial learning rate will be 0.1.
num_images: total number of images that will be used for training.
boundary_epochs: list of ints representing the epochs at which we
decay the learning rate.
decay_rates: list of floats representing the decay rates to be used
for scaling the learning rate. It should have one more element
than `boundary_epochs`, and all elements should have the same type.
base_lr: Initial learning rate scaled based on batch_denom.
warmup: Run a 5 epoch warmup to the initial lr.
Returns:
Returns a function that takes a single argument - the number of batches
trained so far (global_step)- and returns the learning rate to be used
for training the next batch.
"""
initial_learning_rate = base_lr * batch_size / batch_denom
batches_per_epoch = num_images / batch_size
# Reduce the learning rate at certain epochs.
# CIFAR-10: divide by 10 at epoch 100, 150, and 200
# ImageNet: divide by 10 at epoch 30, 60, 80, and 90
boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
vals = [initial_learning_rate * decay for decay in decay_rates]
def learning_rate_fn(global_step):
"""Builds scaled learning rate function with 5 epoch warm up."""
lr = tf.compat.v1.train.piecewise_constant(global_step, boundaries, vals)
if warmup:
warmup_steps = int(batches_per_epoch * 5)
warmup_lr = (
initial_learning_rate * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(pred=global_step < warmup_steps,
true_fn=lambda: warmup_lr,
false_fn=lambda: lr)
return lr
def poly_rate_fn(global_step):
"""Handles linear scaling rule, gradual warmup, and LR decay.
The learning rate starts at 0, then it increases linearly per step. After
FLAGS.poly_warmup_epochs, we reach the base learning rate (scaled to account
for batch size). The learning rate is then decayed using a polynomial rate
decay schedule with power 2.0.
Args:
global_step: the current global_step
Returns:
returns the current learning rate
"""
# Learning rate schedule for LARS polynomial schedule
if flags.FLAGS.batch_size < 8192:
plr = 5.0
w_epochs = 5
elif flags.FLAGS.batch_size < 16384:
plr = 10.0
w_epochs = 5
elif flags.FLAGS.batch_size < 32768:
plr = 25.0
w_epochs = 5
else:
plr = 32.0
w_epochs = 14
w_steps = int(w_epochs * batches_per_epoch)
wrate = (plr * tf.cast(global_step, tf.float32) / tf.cast(
w_steps, tf.float32))
# TODO(pkanwar): use a flag to help calc num_epochs.
num_epochs = 90
train_steps = batches_per_epoch * num_epochs
min_step = tf.constant(1, dtype=tf.int64)
decay_steps = tf.maximum(min_step, tf.subtract(global_step, w_steps))
poly_rate = tf.train.polynomial_decay(
plr,
decay_steps,
train_steps - w_steps + 1,
power=2.0)
return tf.where(global_step <= w_steps, wrate, poly_rate)
# For LARS we have a new learning rate schedule
if flags.FLAGS.enable_lars:
return poly_rate_fn
return learning_rate_fn
def per_replica_batch_size(batch_size, num_gpus):
"""For multi-gpu, batch-size must be a multiple of the number of GPUs.
Note that distribution strategy handles this automatically when used with
Keras. For using with Estimator, we need to get per GPU batch.
Args:
batch_size: Global batch size to be divided among devices. This should be
equal to num_gpus times the single-GPU batch_size for multi-gpu training.
num_gpus: How many GPUs are used with DistributionStrategies.
Returns:
Batch size per device.
Raises:
ValueError: if batch_size is not divisible by number of devices
"""
if num_gpus <= 1:
return batch_size
remainder = batch_size % num_gpus
if remainder:
err = ('When running with multiple GPUs, batch size '
'must be a multiple of the number of available GPUs. Found {} '
'GPUs with a batch size of {}; try --batch_size={} instead.'
).format(num_gpus, batch_size, batch_size - remainder)
raise ValueError(err)
return int(batch_size / num_gpus)
def resnet_model_fn(features, labels, mode, model_class,
resnet_size, weight_decay, learning_rate_fn, momentum,
data_format, resnet_version, loss_scale,
loss_filter_fn=None, dtype=resnet_model.DEFAULT_DTYPE,
fine_tune=False, label_smoothing=0.0):
"""Shared functionality for different resnet model_fns.
Initializes the ResnetModel representing the model layers
and uses that model to build the necessary EstimatorSpecs for
the `mode` in question. For training, this means building losses,
the optimizer, and the train op that get passed into the EstimatorSpec.
For evaluation and prediction, the EstimatorSpec is returned without
a train op, but with the necessary parameters for the given mode.
Args:
features: tensor representing input images
labels: tensor representing class labels for all input images
mode: current estimator mode; should be one of
`tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`
model_class: a class representing a TensorFlow model that has a __call__
function. We assume here that this is a subclass of ResnetModel.
resnet_size: A single integer for the size of the ResNet model.
weight_decay: weight decay loss rate used to regularize learned variables.
learning_rate_fn: function that returns the current learning rate given
the current global_step
momentum: momentum term used for optimization
data_format: Input format ('channels_last', 'channels_first', or None).
If set to None, the format is dependent on whether a GPU is available.
resnet_version: Integer representing which version of the ResNet network to
use. See README for details. Valid values: [1, 2]
loss_scale: The factor to scale the loss for numerical stability. A detailed
summary is present in the arg parser help text.
loss_filter_fn: function that takes a string variable name and returns
True if the var should be included in loss calculation, and False
otherwise. If None, batch_normalization variables will be excluded
from the loss.
dtype: the TensorFlow dtype to use for calculations.
fine_tune: If True only train the dense layers(final layers).
label_smoothing: If greater than 0 then smooth the labels.
Returns:
EstimatorSpec parameterized according to the input params and the
current mode.
"""
# Generate a summary node for the images
tf.compat.v1.summary.image('images', features, max_outputs=6)
# Checks that features/images have same data type being used for calculations.
assert features.dtype == dtype
model = model_class(resnet_size, data_format, resnet_version=resnet_version,
dtype=dtype)
logits = model(features, mode == tf.estimator.ModeKeys.TRAIN)
# This acts as a no-op if the logits are already in fp32 (provided logits are
# not a SparseTensor). If dtype is is low precision, logits must be cast to
# fp32 for numerical stability.
logits = tf.cast(logits, tf.float32)
predictions = {
'classes': tf.argmax(input=logits, axis=1),
'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
}
if mode == tf.estimator.ModeKeys.PREDICT:
# Return the predictions and the specification for serving a SavedModel
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
export_outputs={
'predict': tf.estimator.export.PredictOutput(predictions)
})
# Calculate loss, which includes softmax cross entropy and L2 regularization.
if label_smoothing != 0.0:
one_hot_labels = tf.one_hot(labels, 1001)
cross_entropy = tf.losses.softmax_cross_entropy(
logits=logits, onehot_labels=one_hot_labels,
label_smoothing=label_smoothing)
else:
cross_entropy = tf.compat.v1.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
# Create a tensor named cross_entropy for logging purposes.
tf.identity(cross_entropy, name='cross_entropy')
tf.compat.v1.summary.scalar('cross_entropy', cross_entropy)
# If no loss_filter_fn is passed, assume we want the default behavior,
# which is that batch_normalization variables are excluded from loss.
def exclude_batch_norm(name):
return 'batch_normalization' not in name
loss_filter_fn = loss_filter_fn or exclude_batch_norm
# Add weight decay to the loss.
l2_loss = weight_decay * tf.add_n(
# loss is computed using fp32 for numerical stability.
[
tf.nn.l2_loss(tf.cast(v, tf.float32))
for v in tf.compat.v1.trainable_variables()
if loss_filter_fn(v.name)
])
tf.compat.v1.summary.scalar('l2_loss', l2_loss)
loss = cross_entropy + l2_loss
if mode == tf.estimator.ModeKeys.TRAIN:
global_step = tf.compat.v1.train.get_or_create_global_step()
learning_rate = learning_rate_fn(global_step)
# Create a tensor named learning_rate for logging purposes
tf.identity(learning_rate, name='learning_rate')
tf.compat.v1.summary.scalar('learning_rate', learning_rate)
if flags.FLAGS.enable_lars:
from tensorflow.contrib import opt as contrib_opt # pylint: disable=g-import-not-at-top
optimizer = contrib_opt.LARSOptimizer(
learning_rate,
momentum=momentum,
weight_decay=weight_decay,
skip_list=['batch_normalization', 'bias'])
else:
optimizer = tf.compat.v1.train.MomentumOptimizer(
learning_rate=learning_rate,
momentum=momentum
)
fp16_implementation = getattr(flags.FLAGS, 'fp16_implementation', None)
if fp16_implementation == 'graph_rewrite':
optimizer = (
tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
optimizer, loss_scale=loss_scale))
def _dense_grad_filter(gvs):
"""Only apply gradient updates to the final layer.
This function is used for fine tuning.
Args:
gvs: list of tuples with gradients and variable info
Returns:
filtered gradients so that only the dense layer remains
"""
return [(g, v) for g, v in gvs if 'dense' in v.name]
if loss_scale != 1 and fp16_implementation != 'graph_rewrite':
# When computing fp16 gradients, often intermediate tensor values are
# so small, they underflow to 0. To avoid this, we multiply the loss by
# loss_scale to make these tensor values loss_scale times bigger.
scaled_grad_vars = optimizer.compute_gradients(loss * loss_scale)
if fine_tune:
scaled_grad_vars = _dense_grad_filter(scaled_grad_vars)
# Once the gradient computation is complete we can scale the gradients
# back to the correct scale before passing them to the optimizer.
unscaled_grad_vars = [(grad / loss_scale, var)
for grad, var in scaled_grad_vars]
minimize_op = optimizer.apply_gradients(unscaled_grad_vars, global_step)
else:
grad_vars = optimizer.compute_gradients(loss)
if fine_tune:
grad_vars = _dense_grad_filter(grad_vars)
minimize_op = optimizer.apply_gradients(grad_vars, global_step)
update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
train_op = tf.group(minimize_op, update_ops)
else:
train_op = None
accuracy = tf.compat.v1.metrics.accuracy(labels, predictions['classes'])
accuracy_top_5 = tf.compat.v1.metrics.mean(
tf.nn.in_top_k(predictions=logits, targets=labels, k=5, name='top_5_op'))
metrics = {'accuracy': accuracy,
'accuracy_top_5': accuracy_top_5}
# Create a tensor named train_accuracy for logging purposes
tf.identity(accuracy[1], name='train_accuracy')
tf.identity(accuracy_top_5[1], name='train_accuracy_top_5')
tf.compat.v1.summary.scalar('train_accuracy', accuracy[1])
tf.compat.v1.summary.scalar('train_accuracy_top_5', accuracy_top_5[1])
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
def resnet_main(
flags_obj, model_function, input_function, dataset_name, shape=None):
"""Shared main loop for ResNet Models.
Args:
flags_obj: An object containing parsed flags. See define_resnet_flags()
for details.
model_function: the function that instantiates the Model and builds the
ops for train/eval. This will be passed directly into the estimator.
input_function: the function that processes the dataset and returns a
dataset that the estimator can train on. This will be wrapped with
all the relevant flags for running and passed to estimator.
dataset_name: the name of the dataset for training and evaluation. This is
used for logging purpose.
shape: list of ints representing the shape of the images used for training.
This is only used if flags_obj.export_dir is passed.
Returns:
Dict of results of the run. Contains the keys `eval_results` and
`train_hooks`. `eval_results` contains accuracy (top_1) and accuracy_top_5.
`train_hooks` is a list the instances of hooks used during training.
"""
model_helpers.apply_clean(flags.FLAGS)
# Ensures flag override logic is only executed if explicitly triggered.
if flags_obj.tf_gpu_thread_mode:
override_flags_and_set_envars_for_gpu_thread_pool(flags_obj)
# Configures cluster spec for distribution strategy.
num_workers = distribution_utils.configure_cluster(flags_obj.worker_hosts,
flags_obj.task_index)
# Creates session config. allow_soft_placement = True, is required for
# multi-GPU and is not harmful for other modes.
session_config = tf.compat.v1.ConfigProto(
inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads,
intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads,
allow_soft_placement=True)
distribution_strategy = distribution_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_core.get_num_gpus(flags_obj),
all_reduce_alg=flags_obj.all_reduce_alg,
num_packs=flags_obj.num_packs)
# Creates a `RunConfig` that checkpoints every 24 hours which essentially
# results in checkpoints determined only by `epochs_between_evals`.
run_config = tf.estimator.RunConfig(
train_distribute=distribution_strategy,
session_config=session_config,
save_checkpoints_secs=60*60*24,
save_checkpoints_steps=None)
# Initializes model with all but the dense layer from pretrained ResNet.
if flags_obj.pretrained_model_checkpoint_path is not None:
warm_start_settings = tf.estimator.WarmStartSettings(
flags_obj.pretrained_model_checkpoint_path,
vars_to_warm_start='^(?!.*dense)')
else:
warm_start_settings = None
classifier = tf.estimator.Estimator(
model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config,
warm_start_from=warm_start_settings, params={
'resnet_size': int(flags_obj.resnet_size),
'data_format': flags_obj.data_format,
'batch_size': flags_obj.batch_size,
'resnet_version': int(flags_obj.resnet_version),
'loss_scale': flags_core.get_loss_scale(flags_obj,
default_for_fp16=128),
'dtype': flags_core.get_tf_dtype(flags_obj),
'fine_tune': flags_obj.fine_tune,
'num_workers': num_workers,
})
run_params = {
'batch_size': flags_obj.batch_size,
'dtype': flags_core.get_tf_dtype(flags_obj),
'resnet_size': flags_obj.resnet_size,
'resnet_version': flags_obj.resnet_version,
'synthetic_data': flags_obj.use_synthetic_data,
'train_epochs': flags_obj.train_epochs,
'num_workers': num_workers,
}
if flags_obj.use_synthetic_data:
dataset_name = dataset_name + '-synthetic'
benchmark_logger = logger.get_benchmark_logger()
benchmark_logger.log_run_info('resnet', dataset_name, run_params,
test_id=flags_obj.benchmark_test_id)
train_hooks = hooks_helper.get_train_hooks(
flags_obj.hooks,
model_dir=flags_obj.model_dir,
batch_size=flags_obj.batch_size)
def input_fn_train(num_epochs, input_context=None):
return input_function(
is_training=True,
data_dir=flags_obj.data_dir,
batch_size=per_replica_batch_size(
flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
num_epochs=num_epochs,
dtype=flags_core.get_tf_dtype(flags_obj),
datasets_num_private_threads=flags_obj.datasets_num_private_threads,
input_context=input_context)
def input_fn_eval():
return input_function(
is_training=False,
data_dir=flags_obj.data_dir,
batch_size=per_replica_batch_size(
flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
num_epochs=1,
dtype=flags_core.get_tf_dtype(flags_obj))
train_epochs = (0 if flags_obj.eval_only or not flags_obj.train_epochs else
flags_obj.train_epochs)
use_train_and_evaluate = flags_obj.use_train_and_evaluate or num_workers > 1
if use_train_and_evaluate:
train_spec = tf.estimator.TrainSpec(
input_fn=lambda input_context=None: input_fn_train(
train_epochs, input_context=input_context),
hooks=train_hooks,
max_steps=flags_obj.max_train_steps)
eval_spec = tf.estimator.EvalSpec(input_fn=input_fn_eval)
logging.info('Starting to train and evaluate.')
tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
# tf.estimator.train_and_evalute doesn't return anything in multi-worker
# case.
eval_results = {}
else:
if train_epochs == 0:
# If --eval_only is set, perform a single loop with zero train epochs.
schedule, n_loops = [0], 1
else:
# Compute the number of times to loop while training. All but the last
# pass will train for `epochs_between_evals` epochs, while the last will
# train for the number needed to reach `training_epochs`. For instance if
# train_epochs = 25 and epochs_between_evals = 10
# schedule will be set to [10, 10, 5]. That is to say, the loop will:
# Train for 10 epochs and then evaluate.
# Train for another 10 epochs and then evaluate.
# Train for a final 5 epochs (to reach 25 epochs) and then evaluate.
n_loops = math.ceil(train_epochs / flags_obj.epochs_between_evals)
schedule = [flags_obj.epochs_between_evals for _ in range(int(n_loops))]
schedule[-1] = train_epochs - sum(schedule[:-1]) # over counting.
for cycle_index, num_train_epochs in enumerate(schedule):
logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops))
if num_train_epochs:
# Since we are calling classifier.train immediately in each loop, the
# value of num_train_epochs in the lambda function will not be changed
# before it is used. So it is safe to ignore the pylint error here
# pylint: disable=cell-var-from-loop
classifier.train(
input_fn=lambda input_context=None: input_fn_train(
num_train_epochs, input_context=input_context),
hooks=train_hooks,
max_steps=flags_obj.max_train_steps)
# flags_obj.max_train_steps is generally associated with testing and
# profiling. As a result it is frequently called with synthetic data,
# which will iterate forever. Passing steps=flags_obj.max_train_steps
# allows the eval (which is generally unimportant in those circumstances)
# to terminate. Note that eval will run for max_train_steps each loop,
# regardless of the global_step count.
logging.info('Starting to evaluate.')
eval_results = classifier.evaluate(input_fn=input_fn_eval,
steps=flags_obj.max_train_steps)
benchmark_logger.log_evaluation_result(eval_results)
if model_helpers.past_stop_threshold(
flags_obj.stop_threshold, eval_results['accuracy']):
break
if flags_obj.export_dir is not None:
# Exports a saved model for the given classifier.
export_dtype = flags_core.get_tf_dtype(flags_obj)
if flags_obj.image_bytes_as_serving_input:
input_receiver_fn = functools.partial(
image_bytes_serving_input_fn, shape, dtype=export_dtype)
else:
input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
shape, batch_size=flags_obj.batch_size, dtype=export_dtype)
classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn,
strip_default_attrs=True)
stats = {}
stats['eval_results'] = eval_results
stats['train_hooks'] = train_hooks
return stats
def define_resnet_flags(resnet_size_choices=None, dynamic_loss_scale=False,
fp16_implementation=False):
"""Add flags and validators for ResNet."""
flags_core.define_base(clean=True, train_epochs=True,
epochs_between_evals=True, stop_threshold=True,
num_gpu=True, hooks=True, export_dir=True,
distribution_strategy=True)
flags_core.define_performance(num_parallel_calls=False,
inter_op=True,
intra_op=True,
synthetic_data=True,
dtype=True,
all_reduce_alg=True,
num_packs=True,
tf_gpu_thread_mode=True,
datasets_num_private_threads=True,
dynamic_loss_scale=dynamic_loss_scale,
fp16_implementation=fp16_implementation,
loss_scale=True,
tf_data_experimental_slack=True,
max_train_steps=True)
flags_core.define_image()
flags_core.define_benchmark()
flags_core.define_distribution()
flags.adopt_module_key_flags(flags_core)
flags.DEFINE_enum(
name='resnet_version', short_name='rv', default='1',
enum_values=['1', '2'],
help=flags_core.help_wrap(
'Version of ResNet. (1 or 2) See README.md for details.'))
flags.DEFINE_bool(
name='fine_tune', short_name='ft', default=False,
help=flags_core.help_wrap(
'If True do not train any parameters except for the final layer.'))
flags.DEFINE_string(
name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
help=flags_core.help_wrap(
'If not None initialize all the network except the final layer with '
'these values'))
flags.DEFINE_boolean(
name='eval_only', default=False,
help=flags_core.help_wrap('Skip training and only perform evaluation on '
'the latest checkpoint.'))
flags.DEFINE_boolean(
name='image_bytes_as_serving_input', default=False,
help=flags_core.help_wrap(
'If True exports savedmodel with serving signature that accepts '
'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
'represents the image. The former is easier to use for serving at '
'the expense of image resize/cropping being done as part of model '
'inference. Note, this flag only applies to ImageNet and cannot '
'be used for CIFAR.'))
flags.DEFINE_boolean(
name='use_train_and_evaluate', default=False,
help=flags_core.help_wrap(
'If True, uses `tf.estimator.train_and_evaluate` for the training '
'and evaluation loop, instead of separate calls to `classifier.train '
'and `classifier.evaluate`, which is the default behavior.'))
flags.DEFINE_bool(
name='enable_lars', default=False,
help=flags_core.help_wrap(
'Enable LARS optimizer for large batch training.'))
flags.DEFINE_float(
name='label_smoothing', default=0.0,
help=flags_core.help_wrap(
'Label smoothing parameter used in the softmax_cross_entropy'))
flags.DEFINE_float(
name='weight_decay', default=1e-4,
help=flags_core.help_wrap(
'Weight decay coefficiant for l2 regularization.'))
choice_kwargs = dict(
name='resnet_size', short_name='rs', default='50',
help=flags_core.help_wrap('The size of the ResNet model to use.'))
if resnet_size_choices is None:
flags.DEFINE_string(**choice_kwargs)
else:
flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# Transformer Translation Model
This is an implementation of the Transformer translation model as described in the [Attention is All You Need](https://arxiv.org/abs/1706.03762) paper. Based on the code provided by the authors: [Transformer code](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py) from [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor). Also, check out the [tutorial](https://www.tensorflow.org/beta/tutorials/text/transformer) on Transformer in TF 2.0.
**Please follow the [README](https://github.com/tensorflow/models/official/transformer/README.md), the new Keras-based TF 2 implementation, to walk through the new Transformer.**
Transformer is a neural network architecture that solves sequence to sequence problems using attention mechanisms. Unlike traditional neural seq2seq models, Transformer does not involve recurrent connections. The attention mechanism learns dependencies between tokens in two sequences. Since attention weights apply to all tokens in the sequences, the Transformer model is able to easily capture long-distance dependencies.
Transformer's overall structure follows the standard encoder-decoder pattern. The encoder uses self-attention to compute a representation of the input sequence. The decoder generates the output sequence one token at a time, taking the encoder output and previous decoder-outputted tokens as inputs.
The model also applies embeddings on the input and output tokens, and adds a constant positional encoding. The positional encoding adds information about the position of each token.
## Contents
* [Contents](#contents)
* [Walkthrough](#walkthrough)
* [Benchmarks](#benchmarks)
* [Training times](#training-times)
* [Evaluation results](#evaluation-results)
* [Detailed instructions](#detailed-instructions)
* [Environment preparation](#environment-preparation)
* [Download and preprocess datasets](#download-and-preprocess-datasets)
* [Model training and evaluation](#model-training-and-evaluation)
* [Translate using the model](#translate-using-the-model)
* [Compute official BLEU score](#compute-official-bleu-score)
* [TPU](#tpu)
* [Export trained model](#export-trained-model)
* [Example translation](#example-translation)
* [Implementation overview](#implementation-overview)
* [Model Definition](#model-definition)
* [Model Estimator](#model-estimator)
* [Other scripts](#other-scripts)
* [Test dataset](#test-dataset)
* [Term definitions](#term-definitions)
## Walkthrough
Below are the commands for running the Transformer model. See the
[Detailed instructions](#detailed-instructions) for more details on running the
model.
```
cd /path/to/models/official/transformer
# Ensure that PYTHONPATH is correctly defined as described in
# https://github.com/tensorflow/models/tree/master/official#requirements
# export PYTHONPATH="$PYTHONPATH:/path/to/models"
# Export variables
PARAM_SET=big
DATA_DIR=$HOME/transformer/data
MODEL_DIR=$HOME/transformer/model_$PARAM_SET
VOCAB_FILE=$DATA_DIR/vocab.ende.32768
# Download training/evaluation/test datasets
python data_download.py --data_dir=$DATA_DIR
# Train the model for 10 epochs, and evaluate after every epoch.
python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
--vocab_file=$VOCAB_FILE --param_set=$PARAM_SET \
--bleu_source=$DATA_DIR/newstest2014.en --bleu_ref=$DATA_DIR/newstest2014.de
# Run during training in a separate process to get continuous updates,
# or after training is complete.
tensorboard --logdir=$MODEL_DIR
# Translate some text using the trained model
python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
--param_set=$PARAM_SET --text="hello world"
# Compute model's BLEU score using the newstest2014 dataset.
python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
--param_set=$PARAM_SET --file=$DATA_DIR/newstest2014.en --file_out=translation.en
python compute_bleu.py --translation=translation.en --reference=$DATA_DIR/newstest2014.de
```
## Benchmarks
### Training times
Currently, both big and base parameter sets run on a single GPU. The measurements below
are reported from running the model on a P100 GPU.
Param Set | batches/sec | batches per epoch | time per epoch
--- | --- | --- | ---
base | 4.8 | 83244 | 4 hr
big | 1.1 | 41365 | 10 hr
### Evaluation results
Below are the case-insensitive BLEU scores after 10 epochs.
Param Set | Score
--- | --- |
base | 27.7
big | 28.9
## Detailed instructions
0. ### Environment preparation
#### Add models repo to PYTHONPATH
Follow the instructions described in the [Requirements](https://github.com/tensorflow/models/tree/master/official#requirements) section to add the models folder to the python path.
#### Export variables (optional)
Export the following variables, or modify the values in each of the snippets below:
```
PARAM_SET=big
DATA_DIR=$HOME/transformer/data
MODEL_DIR=$HOME/transformer/model_$PARAM_SET
VOCAB_FILE=$DATA_DIR/vocab.ende.32768
```
1. ### Download and preprocess datasets
[data_download.py](data_download.py) downloads and preprocesses the training and evaluation WMT datasets. After the data is downloaded and extracted, the training data is used to generate a vocabulary of subtokens. The evaluation and training strings are tokenized, and the resulting data is sharded, shuffled, and saved as TFRecords.
1.75GB of compressed data will be downloaded. In total, the raw files (compressed, extracted, and combined files) take up 8.4GB of disk space. The resulting TFRecord and vocabulary files are 722MB. The script takes around 40 minutes to run, with the bulk of the time spent downloading and ~15 minutes spent on preprocessing.
Command to run:
```
python data_download.py --data_dir=$DATA_DIR
```
Arguments:
* `--data_dir`: Path where the preprocessed TFRecord data, and vocab file will be saved.
* Use the `--help` or `-h` flag to get a full list of possible arguments.
2. ### Model training and evaluation
[transformer_main.py](transformer_main.py) creates a Transformer model, and trains it using Tensorflow Estimator.
Command to run:
```
python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
--vocab_file=$VOCAB_FILE --param_set=$PARAM_SET
```
Arguments:
* `--data_dir`: This should be set to the same directory given to the `data_download`'s `data_dir` argument.
* `--model_dir`: Directory to save Transformer model training checkpoints.
* `--vocab_file`: Path to subtoken vocabulary file. If data_download was used, you may find the file in `data_dir`.
* `--param_set`: Parameter set to use when creating and training the model. Options are `base` and `big` (default).
* Use the `--help` or `-h` flag to get a full list of possible arguments.
#### Customizing training schedule
By default, the model will train for 10 epochs, and evaluate after every epoch. The training schedule may be defined through the flags:
* Training with epochs (default):
* `--train_epochs`: The total number of complete passes to make through the dataset
* `--epochs_between_evals`: The number of epochs to train between evaluations.
* Training with steps:
* `--train_steps`: sets the total number of training steps to run.
* `--steps_between_evals`: Number of training steps to run between evaluations.
Only one of `train_epochs` or `train_steps` may be set. Since the default option is to evaluate the model after training for an epoch, it may take 4 or more hours between model evaluations. To get more frequent evaluations, use the flags `--train_steps=250000 --steps_between_evals=1000`.
Note: At the beginning of each training session, the training dataset is reloaded and shuffled. Stopping the training before completing an epoch may result in worse model quality, due to the chance that some examples may be seen more than others. Therefore, it is recommended to use epochs when the model quality is important.
#### Compute BLEU score during model evaluation
Use these flags to compute the BLEU when the model evaluates:
* `--bleu_source`: Path to file containing text to translate.
* `--bleu_ref`: Path to file containing the reference translation.
* `--stop_threshold`: Train until the BLEU score reaches this lower bound. This setting overrides the `--train_steps` and `--train_epochs` flags.
When running `transformer_main.py`, use the flags: `--bleu_source=$DATA_DIR/newstest2014.en --bleu_ref=$DATA_DIR/newstest2014.de`
#### Tensorboard
Training and evaluation metrics (loss, accuracy, approximate BLEU score, etc.) are logged, and can be displayed in the browser using Tensorboard.
```
tensorboard --logdir=$MODEL_DIR
```
The values are displayed at [localhost:6006](localhost:6006).
3. ### Translate using the model
[translate.py](translate.py) contains the script to use the trained model to translate input text or file. Each line in the file is translated separately.
Command to run:
```
python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
--param_set=$PARAM_SET --text="hello world"
```
Arguments for initializing the Subtokenizer and trained model:
* `--model_dir` and `--param_set`: These parameters are used to rebuild the trained model
* `--vocab_file`: Path to subtoken vocabulary file. If data_download was used, you may find the file in `data_dir`.
Arguments for specifying what to translate:
* `--text`: Text to translate
* `--file`: Path to file containing text to translate
* `--file_out`: If `--file` is set, then this file will store the input file's translations.
To translate the newstest2014 data, run:
```
python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
--param_set=$PARAM_SET --file=$DATA_DIR/newstest2014.en --file_out=translation.en
```
Translating the file takes around 15 minutes on a GTX1080, or 5 minutes on a P100.
4. ### Compute official BLEU score
Use [compute_bleu.py](compute_bleu.py) to compute the BLEU by comparing generated translations to the reference translation.
Command to run:
```
python compute_bleu.py --translation=translation.en --reference=$DATA_DIR/newstest2014.de
```
Arguments:
* `--translation`: Path to file containing generated translations.
* `--reference`: Path to file containing reference translations.
* Use the `--help` or `-h` flag to get a full list of possible arguments.
5. ### TPU
TPU support for this version of Transformer is experimental. Currently it is present for
demonstration purposes only, but will be optimized in the coming weeks.
## Export trained model
To export the model as a Tensorflow [SavedModel](https://www.tensorflow.org/guide/saved_model) format, use the argument `--export_dir` when running `transformer_main.py`. A folder will be created in the directory with the name as the timestamp (e.g. $EXPORT_DIR/1526427396).
```
EXPORT_DIR=$HOME/transformer/saved_model
python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
--vocab_file=$VOCAB_FILE --param_set=$PARAM_SET --export_model=$EXPORT_DIR
```
To inspect the SavedModel, use saved_model_cli:
```
SAVED_MODEL_DIR=$EXPORT_DIR/{TIMESTAMP} # replace {TIMESTAMP} with the name of the folder created
saved_model_cli show --dir=$SAVED_MODEL_DIR --all
```
### Example translation
Let's translate **"hello world!"**, **"goodbye world."**, and **"Would you like some pie?"**.
The SignatureDef for "translate" is:
signature_def['translate']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input'] tensor_info:
dtype: DT_INT64
shape: (-1, -1)
name: Placeholder:0
The given SavedModel SignatureDef contains the following output(s):
outputs['outputs'] tensor_info:
dtype: DT_INT32
shape: (-1, -1)
name: model/Transformer/strided_slice_19:0
outputs['scores'] tensor_info:
dtype: DT_FLOAT
shape: (-1)
name: model/Transformer/strided_slice_20:0
Follow the steps below to use the translate signature def:
1. #### Encode the inputs to integer arrays.
This can be done using `utils.tokenizer.Subtokenizer`, and the vocab file in the SavedModel assets (`$SAVED_MODEL_DIR/assets.extra/vocab.txt`).
```
from official.transformer.utils.tokenizer import Subtokenizer
s = Subtokenizer(PATH_TO_VOCAB_FILE)
print(s.encode("hello world!", add_eos=True))
```
The encoded inputs are:
* `"hello world!" = [6170, 3731, 178, 207, 1]`
* `"goodbye world." = [15431, 13966, 36, 178, 3, 1]`
* `"Would you like some pie?" = [9092, 72, 155, 202, 19851, 102, 1]`
2. #### Run `saved_model_cli` to obtain the predicted translations
The encoded inputs should be padded so that they are the same length. The padding token is `0`.
```
ENCODED_INPUTS="[[26228, 145, 178, 1, 0, 0, 0], \
[15431, 13966, 36, 178, 3, 1, 0], \
[9092, 72, 155, 202, 19851, 102, 1]]"
```
Now, use the `run` command with `saved_model_cli` to get the outputs.
```
saved_model_cli run --dir=$SAVED_MODEL_DIR --tag_set=serve --signature_def=translate \
--input_expr="input=$ENCODED_INPUTS"
```
The outputs will look similar to:
```
Result for output key outputs:
[[18744 145 297 1 0 0 0 0 0 0 0 0
0 0]
[ 5450 4642 21 11 297 3 1 0 0 0 0 0
0 0]
[25940 22 66 103 21713 31 102 1 0 0 0 0
0 0]]
Result for output key scores:
[-1.5493642 -1.4032784 -3.252089 ]
```
3. #### Decode the outputs to strings.
Use the `Subtokenizer` and vocab file as described in step 1 to decode the output integer arrays.
```
from official.transformer.utils.tokenizer import Subtokenizer
s = Subtokenizer(PATH_TO_VOCAB_FILE)
print(s.decode([18744, 145, 297, 1]))
```
The decoded outputs from above are:
* `[18744, 145, 297, 1] = "Hallo Welt<EOS>"`
* `[5450, 4642, 21, 11, 297, 3, 1] = "Abschied von der Welt.<EOS>"`
* `[25940, 22, 66, 103, 21713, 31, 102, 1] = "Möchten Sie einen Kuchen?<EOS>"`
## Implementation overview
A brief look at each component in the code:
### Model Definition
The [model](model) subdirectory contains the implementation of the Transformer model. The following files define the Transformer model and its layers:
* [transformer.py](model/transformer.py): Defines the transformer model and its encoder/decoder layer stacks.
* [embedding_layer.py](model/embedding_layer.py): Contains the layer that calculates the embeddings. The embedding weights are also used to calculate the pre-softmax probabilities from the decoder output.
* [attention_layer.py](model/attention_layer.py): Defines the multi-headed and self attention layers that are used in the encoder/decoder stacks.
* [ffn_layer.py](model/ffn_layer.py): Defines the feedforward network that is used in the encoder/decoder stacks. The network is composed of 2 fully connected layers.
Other files:
* [beam_search.py](model/beam_search.py) contains the beam search implementation, which is used during model inference to find high scoring translations.
* [model_params.py](model/model_params.py) contains the parameters used for the big and base models.
* [model_utils.py](model/model_utils.py) defines some helper functions used in the model (calculating padding, bias, etc.).
### Model Estimator
[transformer_main.py](model/transformer.py) creates an `Estimator` to train and evaluate the model.
Helper functions:
* [utils/dataset.py](utils/dataset.py): contains functions for creating a `dataset` that is passed to the `Estimator`.
* [utils/metrics.py](utils/metrics.py): defines metrics functions used by the `Estimator` to evaluate the
### Other scripts
Aside from the main file to train the Transformer model, we provide other scripts for using the model or downloading the data:
#### Data download and preprocessing
[data_download.py](data_download.py) downloads and extracts data, then uses `Subtokenizer` to tokenize strings into arrays of int IDs. The int arrays are converted to `tf.Examples` and saved in the `tf.RecordDataset` format.
The data is downloaded from the Workshop of Machine Translation (WMT) [news translation task](http://www.statmt.org/wmt17/translation-task.html). The following datasets are used:
* Europarl v7
* Common Crawl corpus
* News Commentary v12
See the [download section](http://www.statmt.org/wmt17/translation-task.html#download) to explore the raw datasets. The parameters in this model are tuned to fit the English-German translation data, so the EN-DE texts are extracted from the downloaded compressed files.
The text is transformed into arrays of integer IDs using the `Subtokenizer` defined in [`utils/tokenizer.py`](util/tokenizer.py). During initialization of the `Subtokenizer`, the raw training data is used to generate a vocabulary list containing common subtokens.
The target vocabulary size of the WMT dataset is 32,768. The set of subtokens is found through binary search on the minimum number of times a subtoken appears in the data. The actual vocabulary size is 33,708, and is stored in a 324kB file.
#### Translation
Translation is defined in [translate.py](translate.py). First, `Subtokenizer` tokenizes the input. The vocabulary file is the same used to tokenize the training/eval files. Next, beam search is used to find the combination of tokens that maximizes the probability outputted by the model decoder. The tokens are then converted back to strings with `Subtokenizer`.
#### BLEU computation
[compute_bleu.py](compute_bleu.py): Implementation from [https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py).
### Test dataset
The [newstest2014 files](https://storage.googleapis.com/tf-perf-public/official_transformer/test_data/newstest2014.tgz)
are extracted from the [NMT Seq2Seq tutorial](https://google.github.io/seq2seq/nmt/#download-data).
The raw text files are converted from the SGM format of the
[WMT 2016](http://www.statmt.org/wmt16/translation-task.html) test sets. The
newstest2014 files are put into the `$DATA_DIR` when executing
`data_download.py`
## Term definitions
**Steps / Epochs**:
* Step: unit for processing a single batch of data
* Epoch: a complete run through the dataset
Example: Consider a training a dataset with 100 examples that is divided into 20 batches with 5 examples per batch. A single training step trains the model on one batch. After 20 training steps, the model will have trained on every batch in the dataset, or one epoch.
**Subtoken**: Words are referred to as tokens, and parts of words are referred to as 'subtokens'. For example, the word 'inclined' may be split into `['incline', 'd_']`. The '\_' indicates the end of the token. The subtoken vocabulary list is guaranteed to contain the alphabet (including numbers and special characters), so all words can be tokenized.
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of multiheaded attention and self-attention layers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
class Attention(tf.layers.Layer):
"""Multi-headed attention layer."""
def __init__(self, hidden_size, num_heads, attention_dropout, train):
if hidden_size % num_heads != 0:
raise ValueError("Hidden size must be evenly divisible by the number of "
"heads.")
super(Attention, self).__init__()
self.hidden_size = hidden_size
self.num_heads = num_heads
self.attention_dropout = attention_dropout
self.train = train
# Layers for linearly projecting the queries, keys, and values.
self.q_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="q")
self.k_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="k")
self.v_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="v")
self.output_dense_layer = tf.layers.Dense(hidden_size, use_bias=False,
name="output_transform")
def split_heads(self, x):
"""Split x into different heads, and transpose the resulting value.
The tensor is transposed to insure the inner dimensions hold the correct
values during the matrix multiplication.
Args:
x: A tensor with shape [batch_size, length, hidden_size]
Returns:
A tensor with shape [batch_size, num_heads, length, hidden_size/num_heads]
"""
with tf.name_scope("split_heads"):
batch_size = tf.shape(x)[0]
length = tf.shape(x)[1]
# Calculate depth of last dimension after it has been split.
depth = (self.hidden_size // self.num_heads)
# Split the last dimension
x = tf.reshape(x, [batch_size, length, self.num_heads, depth])
# Transpose the result
return tf.transpose(x, [0, 2, 1, 3])
def combine_heads(self, x):
"""Combine tensor that has been split.
Args:
x: A tensor [batch_size, num_heads, length, hidden_size/num_heads]
Returns:
A tensor with shape [batch_size, length, hidden_size]
"""
with tf.name_scope("combine_heads"):
batch_size = tf.shape(x)[0]
length = tf.shape(x)[2]
x = tf.transpose(x, [0, 2, 1, 3]) # --> [batch, length, num_heads, depth]
return tf.reshape(x, [batch_size, length, self.hidden_size])
def call(self, x, y, bias, cache=None):
"""Apply attention mechanism to x and y.
Args:
x: a tensor with shape [batch_size, length_x, hidden_size]
y: a tensor with shape [batch_size, length_y, hidden_size]
bias: attention bias that will be added to the result of the dot product.
cache: (Used during prediction) dictionary with tensors containing results
of previous attentions. The dictionary must have the items:
{"k": tensor with shape [batch_size, i, key_channels],
"v": tensor with shape [batch_size, i, value_channels]}
where i is the current decoded length.
Returns:
Attention layer output with shape [batch_size, length_x, hidden_size]
"""
# Linearly project the query (q), key (k) and value (v) using different
# learned projections. This is in preparation of splitting them into
# multiple heads. Multi-head attention uses multiple queries, keys, and
# values rather than regular attention (which uses a single q, k, v).
q = self.q_dense_layer(x)
k = self.k_dense_layer(y)
v = self.v_dense_layer(y)
if cache is not None:
# Combine cached keys and values with new keys and values.
k = tf.concat([cache["k"], k], axis=1)
v = tf.concat([cache["v"], v], axis=1)
# Update cache
cache["k"] = k
cache["v"] = v
# Split q, k, v into heads.
q = self.split_heads(q)
k = self.split_heads(k)
v = self.split_heads(v)
# Scale q to prevent the dot product between q and k from growing too large.
depth = (self.hidden_size // self.num_heads)
q *= depth ** -0.5
# Calculate dot product attention
logits = tf.matmul(q, k, transpose_b=True)
logits += bias
weights = tf.nn.softmax(logits, name="attention_weights")
if self.train:
weights = tf.nn.dropout(weights, 1.0 - self.attention_dropout)
attention_output = tf.matmul(weights, v)
# Recombine heads --> [batch_size, length, hidden_size]
attention_output = self.combine_heads(attention_output)
# Run the combined outputs through another linear projection layer.
attention_output = self.output_dense_layer(attention_output)
return attention_output
class SelfAttention(Attention):
"""Multiheaded self-attention layer."""
def call(self, x, bias, cache=None):
return super(SelfAttention, self).call(x, x, bias, cache)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Input pipeline for the transformer model to read, filter, and batch examples.
Two things to note in the pipeline:
1. Batching scheme
The examples encoded in the TFRecord files contain data in the format:
{"inputs": [variable length array of integers],
"targets": [variable length array of integers]}
Where integers in the arrays refer to tokens in the English and German vocab
file (named `vocab.ende.32768`).
Prior to batching, elements in the dataset are grouped by length (max between
"inputs" and "targets" length). Each group is then batched such that:
group_batch_size * length <= batch_size.
Another way to view batch_size is the maximum number of tokens in each batch.
Once batched, each element in the dataset will have the shape:
{"inputs": [group_batch_size, padded_input_length],
"targets": [group_batch_size, padded_target_length]}
Lengths are padded to the longest "inputs" or "targets" sequence in the batch
(padded_input_length and padded_target_length can be different).
This batching scheme decreases the fraction of padding tokens per training
batch, thus improving the training speed significantly.
2. Shuffling
While training, the dataset is shuffled in two places in the code. The first
is the list of training files. Second, while reading records using
`parallel_interleave`, the `sloppy` argument is used to generate randomness
in the order of the examples.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
import tensorflow.compat.v1 as tf
from official.utils.misc import model_helpers
# Buffer size for reading records from a TFRecord file. Each training file is
# 7.2 MB, so 8 MB allows an entire file to be kept in memory.
_READ_RECORD_BUFFER = 8 * 1000 * 1000
# Example grouping constants. Defines length boundaries for each group.
# These values are the defaults used in Tensor2Tensor.
_MIN_BOUNDARY = 8
_BOUNDARY_SCALE = 1.1
def _load_records(filename):
"""Read file and return a dataset of tf.Examples."""
return tf.data.TFRecordDataset(filename, buffer_size=_READ_RECORD_BUFFER)
def _parse_example(serialized_example):
"""Return inputs and targets Tensors from a serialized tf.Example."""
data_fields = {
"inputs": tf.VarLenFeature(tf.int64),
"targets": tf.VarLenFeature(tf.int64)
}
parsed = tf.parse_single_example(serialized_example, data_fields)
inputs = tf.sparse_tensor_to_dense(parsed["inputs"])
targets = tf.sparse_tensor_to_dense(parsed["targets"])
return inputs, targets
def _filter_max_length(example, max_length=256):
"""Indicates whether the example's length is lower than the maximum length."""
return tf.logical_and(tf.size(example[0]) <= max_length,
tf.size(example[1]) <= max_length)
def _get_example_length(example):
"""Returns the maximum length between the example inputs and targets."""
length = tf.maximum(tf.shape(example[0])[0], tf.shape(example[1])[0])
return length
def _create_min_max_boundaries(
max_length, min_boundary=_MIN_BOUNDARY, boundary_scale=_BOUNDARY_SCALE):
"""Create min and max boundary lists up to max_length.
For example, when max_length=24, min_boundary=4 and boundary_scale=2, the
returned values will be:
buckets_min = [0, 4, 8, 16, 24]
buckets_max = [4, 8, 16, 24, 25]
Args:
max_length: The maximum length of example in dataset.
min_boundary: Minimum length in boundary.
boundary_scale: Amount to scale consecutive boundaries in the list.
Returns:
min and max boundary lists
"""
# Create bucket boundaries list by scaling the previous boundary or adding 1
# (to ensure increasing boundary sizes).
bucket_boundaries = []
x = min_boundary
while x < max_length:
bucket_boundaries.append(x)
x = max(x + 1, int(x * boundary_scale))
# Create min and max boundary lists from the initial list.
buckets_min = [0] + bucket_boundaries
buckets_max = bucket_boundaries + [max_length + 1]
return buckets_min, buckets_max
def _batch_examples(dataset, batch_size, max_length):
"""Group examples by similar lengths, and return batched dataset.
Each batch of similar-length examples are padded to the same length, and may
have different number of elements in each batch, such that:
group_batch_size * padded_length <= batch_size.
This decreases the number of padding tokens per batch, which improves the
training speed.
Args:
dataset: Dataset of unbatched examples.
batch_size: Max number of tokens per batch of examples.
max_length: Max number of tokens in an example input or target sequence.
Returns:
Dataset of batched examples with similar lengths.
"""
# Get min and max boundary lists for each example. These are used to calculate
# the `bucket_id`, which is the index at which:
# buckets_min[bucket_id] <= len(example) < buckets_max[bucket_id]
# Note that using both min and max lists improves the performance.
buckets_min, buckets_max = _create_min_max_boundaries(max_length)
# Create list of batch sizes for each bucket_id, so that
# bucket_batch_size[bucket_id] * buckets_max[bucket_id] <= batch_size
bucket_batch_sizes = [batch_size // x for x in buckets_max]
# bucket_id will be a tensor, so convert this list to a tensor as well.
bucket_batch_sizes = tf.constant(bucket_batch_sizes, dtype=tf.int64)
def example_to_bucket_id(example_input, example_target):
"""Return int64 bucket id for this example, calculated based on length."""
seq_length = _get_example_length((example_input, example_target))
# TODO: investigate whether removing code branching improves performance.
conditions_c = tf.logical_and(
tf.less_equal(buckets_min, seq_length),
tf.less(seq_length, buckets_max))
bucket_id = tf.reduce_min(tf.where(conditions_c))
return bucket_id
def window_size_fn(bucket_id):
"""Return number of examples to be grouped when given a bucket id."""
return bucket_batch_sizes[bucket_id]
def batching_fn(bucket_id, grouped_dataset):
"""Batch and add padding to a dataset of elements with similar lengths."""
bucket_batch_size = window_size_fn(bucket_id)
# Batch the dataset and add padding so that all input sequences in the
# examples have the same length, and all target sequences have the same
# lengths as well. Resulting lengths of inputs and targets can differ.
return grouped_dataset.padded_batch(bucket_batch_size, ([None], [None]))
return dataset.apply(tf.data.experimental.group_by_window(
key_func=example_to_bucket_id,
reduce_func=batching_fn,
window_size=None,
window_size_func=window_size_fn))
def _read_and_batch_from_files(
file_pattern, batch_size, max_length, num_parallel_calls, shuffle, repeat,
static_batch=False):
"""Create dataset where each item is a dict of "inputs" and "targets".
Args:
file_pattern: String used to match the input TFRecord files.
batch_size: Maximum number of tokens per batch of examples
max_length: Maximum number of tokens per example
num_parallel_calls: Number of cpu cores for parallel input processing.
shuffle: If true, randomizes order of elements.
repeat: Number of times to repeat the dataset. If None, the dataset is
repeated forever.
static_batch: Whether the batches in the dataset should have static shapes.
If True, the input is batched so that every batch has the
shape [batch_size // max_length, max_length]. If False, the input is
grouped by length, and batched so that batches may have different
shapes [N, M], where:
N * M <= batch_size
M <= max_length
In general, this setting should be False. Dynamic shapes allow the inputs
to be grouped so that the number of padding tokens is minimized, and helps
model training. In cases where the input shape must be static
(e.g. running on TPU), this setting should be set to True.
Returns:
tf.data.Dataset object containing examples loaded from the files.
"""
dataset = tf.data.Dataset.list_files(file_pattern, shuffle=shuffle)
# Read files and interleave results. When training, the order of the examples
# will be non-deterministic.
dataset = dataset.apply(
tf.data.experimental.parallel_interleave(
_load_records, sloppy=shuffle, cycle_length=num_parallel_calls))
# Parse each tf.Example into a dictionary
# TODO: Look into prefetch_input_elements for performance optimization.
dataset = dataset.map(_parse_example,
num_parallel_calls=num_parallel_calls)
# Remove examples where the input or target length exceeds the maximum length,
dataset = dataset.filter(lambda x, y: _filter_max_length((x, y), max_length))
if static_batch:
dataset = dataset.padded_batch(
batch_size // max_length, ([max_length], [max_length]),
drop_remainder=True)
else:
# Group and batch such that each batch has examples of similar length.
dataset = _batch_examples(dataset, batch_size, max_length)
dataset = dataset.repeat(repeat)
# Prefetch the next element to improve speed of input pipeline.
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
def _generate_synthetic_data(params):
"""Create synthetic data based on the parameter batch size."""
batch = length = int(math.sqrt(params["batch_size"]))
return model_helpers.generate_synthetic_data(
input_shape=tf.TensorShape([batch, length]),
input_value=1,
input_dtype=tf.int32,
label_shape=tf.TensorShape([batch, length]),
label_value=1,
label_dtype=tf.int32,
)
def train_input_fn(params):
"""Load and return dataset of batched examples for use during training."""
file_pattern = os.path.join(params["data_dir"] or "", "*train*")
if params["use_synthetic_data"]:
return _generate_synthetic_data(params)
return _read_and_batch_from_files(
file_pattern, params["batch_size"], params["max_length"],
params["num_parallel_calls"], shuffle=True,
repeat=params["repeat_dataset"], static_batch=params["static_batch"])
def eval_input_fn(params):
"""Load and return dataset of batched examples for use during evaluation."""
file_pattern = os.path.join(params["data_dir"] or "", "*dev*")
if params["use_synthetic_data"]:
return _generate_synthetic_data(params)
return _read_and_batch_from_files(
file_pattern, params["batch_size"], params["max_length"],
params["num_parallel_calls"], shuffle=False, repeat=1,
static_batch=params["static_batch"])
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Implementation of embedding layer with shared weights."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf # pylint: disable=g-bad-import-order
from official.r1.utils import tpu as tpu_utils
class EmbeddingSharedWeights(tf.layers.Layer):
"""Calculates input embeddings and pre-softmax linear with shared weights."""
def __init__(self, vocab_size, hidden_size, method="gather"):
"""Specify characteristic parameters of embedding layer.
Args:
vocab_size: Number of tokens in the embedding. (Typically ~32,000)
hidden_size: Dimensionality of the embedding. (Typically 512 or 1024)
method: Strategy for performing embedding lookup. "gather" uses tf.gather
which performs well on CPUs and GPUs, but very poorly on TPUs. "matmul"
one-hot encodes the indicies and formulates the embedding as a sparse
matrix multiplication. The matmul formulation is wasteful as it does
extra work, however matrix multiplication is very fast on TPUs which
makes "matmul" considerably faster than "gather" on TPUs.
"""
super(EmbeddingSharedWeights, self).__init__()
self.vocab_size = vocab_size
self.hidden_size = hidden_size
if method not in ("gather", "matmul"):
raise ValueError("method {} must be 'gather' or 'matmul'".format(method))
self.method = method
def build(self, _):
with tf.variable_scope("embedding_and_softmax", reuse=tf.AUTO_REUSE):
# Create and initialize weights. The random normal initializer was chosen
# randomly, and works well.
self.shared_weights = tf.get_variable(
"weights", [self.vocab_size, self.hidden_size],
initializer=tf.random_normal_initializer(
0., self.hidden_size ** -0.5))
self.built = True
def call(self, x):
"""Get token embeddings of x.
Args:
x: An int64 tensor with shape [batch_size, length]
Returns:
embeddings: float32 tensor with shape [batch_size, length, embedding_size]
padding: float32 tensor with shape [batch_size, length] indicating the
locations of the padding tokens in x.
"""
with tf.name_scope("embedding"):
# Create binary mask of size [batch_size, length]
mask = tf.to_float(tf.not_equal(x, 0))
if self.method == "gather":
embeddings = tf.gather(self.shared_weights, x)
embeddings *= tf.expand_dims(mask, -1)
else: # matmul
embeddings = tpu_utils.embedding_matmul(
embedding_table=self.shared_weights,
values=tf.cast(x, dtype=tf.int32),
mask=mask
)
# embedding_matmul already zeros out masked positions, so
# `embeddings *= tf.expand_dims(mask, -1)` is unnecessary.
# Scale embedding by the sqrt of the hidden size
embeddings *= self.hidden_size ** 0.5
return embeddings
def linear(self, x):
"""Computes logits by running x through a linear layer.
Args:
x: A float32 tensor with shape [batch_size, length, hidden_size]
Returns:
float32 tensor with shape [batch_size, length, vocab_size].
"""
with tf.name_scope("presoftmax_linear"):
batch_size = tf.shape(x)[0]
length = tf.shape(x)[1]
x = tf.reshape(x, [-1, self.hidden_size])
logits = tf.matmul(x, self.shared_weights, transpose_b=True)
return tf.reshape(logits, [batch_size, length, self.vocab_size])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment