Unverified Commit 1f3247f4 authored by Ayushman Kumar's avatar Ayushman Kumar Committed by GitHub
Browse files

Merge pull request #6 from tensorflow/master

Updated
parents 370a4c8d 0265f59c
......@@ -25,6 +25,7 @@ from __future__ import division
from __future__ import print_function
import tensorflow as tf # pylint: disable=g-bad-import-order
from absl import logging
from official.utils.logs import hooks
from official.utils.logs import logger
......@@ -57,9 +58,9 @@ def get_train_hooks(name_list, use_tpu=False, **kwargs):
return []
if use_tpu:
tf.compat.v1.logging.warning('hooks_helper received name_list `{}`, but a '
'TPU is specified. No hooks will be used.'
.format(name_list))
logging.warning(
'hooks_helper received name_list `%s`, but a '
'TPU is specified. No hooks will be used.', name_list)
return []
train_hooks = []
......
......@@ -21,12 +21,13 @@ from __future__ import print_function
import time
from absl import logging
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.utils.logs import hooks
from official.utils.testing import mock_lib
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
logging.set_verbosity(logging.DEBUG)
class ExamplesPerSecondHookTest(tf.test.TestCase):
......
......@@ -35,6 +35,7 @@ from six.moves import _thread as thread
from absl import flags
import tensorflow as tf
from tensorflow.python.client import device_lib
from absl import logging
from official.utils.logs import cloud_lib
......@@ -119,9 +120,8 @@ class BaseBenchmarkLogger(object):
eval_results: dict, the result of evaluate.
"""
if not isinstance(eval_results, dict):
tf.compat.v1.logging.warning(
"eval_results should be dictionary for logging. Got %s",
type(eval_results))
logging.warning("eval_results should be dictionary for logging. Got %s",
type(eval_results))
return
global_step = eval_results[tf.compat.v1.GraphKeys.GLOBAL_STEP]
for key in sorted(eval_results):
......@@ -144,12 +144,12 @@ class BaseBenchmarkLogger(object):
"""
metric = _process_metric_to_json(name, value, unit, global_step, extras)
if metric:
tf.compat.v1.logging.info("Benchmark metric: %s", metric)
logging.info("Benchmark metric: %s", metric)
def log_run_info(self, model_name, dataset_name, run_params, test_id=None):
tf.compat.v1.logging.info(
"Benchmark run: %s", _gather_run_info(model_name, dataset_name,
run_params, test_id))
logging.info(
"Benchmark run: %s",
_gather_run_info(model_name, dataset_name, run_params, test_id))
def on_finish(self, status):
pass
......@@ -187,7 +187,7 @@ class BenchmarkFileLogger(BaseBenchmarkLogger):
self._metric_file_handler.write("\n")
self._metric_file_handler.flush()
except (TypeError, ValueError) as e:
tf.compat.v1.logging.warning(
logging.warning(
"Failed to dump metric to log file: name %s, value %s, error %s",
name, value, e)
......@@ -212,8 +212,7 @@ class BenchmarkFileLogger(BaseBenchmarkLogger):
json.dump(run_info, f)
f.write("\n")
except (TypeError, ValueError) as e:
tf.compat.v1.logging.warning(
"Failed to dump benchmark run info to log file: %s", e)
logging.warning("Failed to dump benchmark run info to log file: %s", e)
def on_finish(self, status):
self._metric_file_handler.flush()
......@@ -322,8 +321,8 @@ def _process_metric_to_json(
name, value, unit=None, global_step=None, extras=None):
"""Validate the metric data and generate JSON for insert."""
if not isinstance(value, numbers.Number):
tf.compat.v1.logging.warning(
"Metric value to log should be a number. Got %s", type(value))
logging.warning("Metric value to log should be a number. Got %s",
type(value))
return None
extras = _convert_to_json_dict(extras)
......@@ -383,8 +382,7 @@ def _collect_cpu_info(run_info):
run_info["machine_config"]["cpu_info"] = cpu_info
except ImportError:
tf.compat.v1.logging.warn(
"'cpuinfo' not imported. CPU info will not be logged.")
logging.warn("'cpuinfo' not imported. CPU info will not be logged.")
def _collect_memory_info(run_info):
......@@ -396,8 +394,7 @@ def _collect_memory_info(run_info):
run_info["machine_config"]["memory_total"] = vmem.total
run_info["machine_config"]["memory_available"] = vmem.available
except ImportError:
tf.compat.v1.logging.warn(
"'psutil' not imported. Memory info will not be logged.")
logging.warn("'psutil' not imported. Memory info will not be logged.")
def _collect_test_environment(run_info):
......
......@@ -28,6 +28,7 @@ import unittest
import mock
from absl.testing import flagsaver
import tensorflow as tf # pylint: disable=g-bad-import-order
from absl import logging
try:
from google.cloud import bigquery
......@@ -79,7 +80,7 @@ class BenchmarkLoggerTest(tf.test.TestCase):
mock_logger = mock.MagicMock()
mock_config_benchmark_logger.return_value = mock_logger
with logger.benchmark_context(None):
tf.compat.v1.logging.info("start benchmarking")
logging.info("start benchmarking")
mock_logger.on_finish.assert_called_once_with(logger.RUN_STATUS_SUCCESS)
@mock.patch("official.utils.logs.logger.config_benchmark_logger")
......@@ -96,18 +97,18 @@ class BaseBenchmarkLoggerTest(tf.test.TestCase):
def setUp(self):
super(BaseBenchmarkLoggerTest, self).setUp()
self._actual_log = tf.compat.v1.logging.info
self._actual_log = logging.info
self.logged_message = None
def mock_log(*args, **kwargs):
self.logged_message = args
self._actual_log(*args, **kwargs)
tf.compat.v1.logging.info = mock_log
logging.info = mock_log
def tearDown(self):
super(BaseBenchmarkLoggerTest, self).tearDown()
tf.compat.v1.logging.info = self._actual_log
logging.info = self._actual_log
def test_log_metric(self):
log = logger.BaseBenchmarkLogger()
......
......@@ -31,8 +31,9 @@ import re
import subprocess
import sys
import typing
from absl import logging
# pylint:disable=logging-format-interpolation
import tensorflow as tf
_MIN_VERSION = (0, 0, 10)
_STACK_OFFSET = 2
......@@ -94,10 +95,9 @@ def get_mlperf_log():
version = pkg_resources.get_distribution("mlperf_compliance")
version = tuple(int(i) for i in version.version.split("."))
if version < _MIN_VERSION:
tf.compat.v1.logging.warning(
"mlperf_compliance is version {}, must be >= {}".format(
".".join([str(i) for i in version]),
".".join([str(i) for i in _MIN_VERSION])))
logging.warning("mlperf_compliance is version {}, must be >= {}".format(
".".join([str(i) for i in version]),
".".join([str(i) for i in _MIN_VERSION])))
raise ImportError
return mlperf_compliance.mlperf_log
......@@ -187,6 +187,6 @@ def clear_system_caches():
if __name__ == "__main__":
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
logging.set_verbosity(logging.INFO)
with LOGGER(True):
ncf_print(key=TAGS.RUN_START)
......@@ -22,6 +22,8 @@ import json
import os
import random
import string
from absl import logging
import tensorflow.compat.v2 as tf
from official.utils.misc import tpu_lib
......@@ -252,7 +254,7 @@ class SyntheticIterator(object):
def _monkey_patch_dataset_method(strategy):
"""Monkey-patch `strategy`'s `make_dataset_iterator` method."""
def make_dataset(self, dataset):
tf.compat.v1.logging.info('Using pure synthetic data.')
logging.info('Using pure synthetic data.')
with self.scope():
if self.extended._global_batch_size: # pylint: disable=protected-access
return SyntheticDataset(dataset, self.num_replicas_in_sync)
......
......@@ -20,8 +20,11 @@ from __future__ import print_function
import numbers
from absl import logging
import tensorflow as tf
from tensorflow.python.util import nest
# pylint:disable=logging-format-interpolation
def past_stop_threshold(stop_threshold, eval_metric):
......@@ -48,9 +51,8 @@ def past_stop_threshold(stop_threshold, eval_metric):
"must be a number.")
if eval_metric >= stop_threshold:
tf.compat.v1.logging.info(
"Stop threshold of {} was passed with metric value {}.".format(
stop_threshold, eval_metric))
logging.info("Stop threshold of {} was passed with metric value {}.".format(
stop_threshold, eval_metric))
return True
return False
......@@ -88,6 +90,6 @@ def generate_synthetic_data(
def apply_clean(flags_obj):
if flags_obj.clean and tf.io.gfile.exists(flags_obj.model_dir):
tf.compat.v1.logging.info("--clean flag set. Removing existing model dir:"
" {}".format(flags_obj.model_dir))
logging.info("--clean flag set. Removing existing model dir:"
" {}".format(flags_obj.model_dir))
tf.io.gfile.rmtree(flags_obj.model_dir)
......@@ -20,8 +20,9 @@ from __future__ import print_function
import os
from absl import flags
from absl import logging
from absl.testing import flagsaver
import tensorflow as tf # pylint: disable=g-bad-import-order
import tensorflow as tf
FLAGS = flags.FLAGS
......@@ -75,7 +76,7 @@ class PerfZeroBenchmark(tf.test.Benchmark):
def _setup(self):
"""Sets up and resets flags before each test."""
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
logging.set_verbosity(logging.INFO)
if PerfZeroBenchmark.local_flags is None:
for flag_method in self.flag_methods:
flag_method()
......
......@@ -87,10 +87,6 @@ BASE_CFG = {
},
'resnet': {
'resnet_depth': 50,
'dropblock': {
'dropblock_keep_prob': None,
'dropblock_size': None,
},
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
......@@ -111,43 +107,6 @@ BASE_CFG = {
'use_sync_bn': False,
},
},
'nasfpn': {
'min_level': 3,
'max_level': 7,
'fpn_feat_dims': 256,
'num_repeats': 5,
'use_separable_conv': False,
'dropblock': {
'dropblock_keep_prob': None,
'dropblock_size': None,
},
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
'use_sync_bn': False,
},
},
# tunable_nasfpn:strip_begin
'tunable_nasfpn_v1': {
'min_level': 3,
'max_level': 7,
'fpn_feat_dims': 256,
'num_repeats': 5,
'use_separable_conv': False,
'dropblock': {
'dropblock_keep_prob': None,
'dropblock_size': None,
},
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
'use_sync_bn': False,
},
'nodes': None
},
# tunable_nasfpn:strip_end
'postprocess': {
'use_batched_nms': False,
'max_total_size': 100,
......
......@@ -106,10 +106,6 @@ RETINANET_CFG = {
},
'resnet': {
'resnet_depth': 50,
'dropblock': {
'dropblock_keep_prob': None,
'dropblock_size': None,
},
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
......@@ -128,22 +124,6 @@ RETINANET_CFG = {
'batch_norm_trainable': True,
},
},
'nasfpn': {
'min_level': 3,
'max_level': 7,
'fpn_feat_dims': 256,
'num_repeats': 5,
'use_separable_conv': False,
'dropblock': {
'dropblock_keep_prob': None,
'dropblock_size': None,
},
'batch_norm': {
'batch_norm_momentum': 0.997,
'batch_norm_epsilon': 1e-4,
'batch_norm_trainable': True,
},
},
'retinanet_head': {
'min_level': 3,
'max_level': 7,
......
......@@ -52,7 +52,7 @@ flags.DEFINE_string(
flags.DEFINE_string(
'model', default='retinanet',
help='Model to run: `retinanet` or `shapemask`.')
help='Model to run: `retinanet` or `mask_rcnn`.')
flags.DEFINE_string('training_file_pattern', None,
'Location of the train data.')
......
......@@ -37,19 +37,12 @@ def batch_norm_relu_generator(params):
return _batch_norm_op
def dropblock_generator(params):
return nn_ops.Dropblock(
dropblock_keep_prob=params.dropblock_keep_prob,
dropblock_size=params.dropblock_size)
def backbone_generator(params):
"""Generator function for various backbone models."""
if params.architecture.backbone == 'resnet':
resnet_params = params.resnet
backbone_fn = resnet.Resnet(
resnet_depth=resnet_params.resnet_depth,
dropblock=dropblock_generator(resnet_params.dropblock),
batch_norm_relu=batch_norm_relu_generator(resnet_params.batch_norm))
else:
raise ValueError('Backbone model %s is not supported.' %
......
......@@ -84,88 +84,3 @@ class BatchNormRelu(tf.keras.layers.Layer):
inputs = tf.nn.relu(inputs)
return inputs
class Dropblock(object):
"""DropBlock: a regularization method for convolutional neural networks.
DropBlock is a form of structured dropout, where units in a contiguous
region of a feature map are dropped together. DropBlock works better than
dropout on convolutional layers due to the fact that activation units in
convolutional layers are spatially correlated.
See https://arxiv.org/pdf/1810.12890.pdf for details.
"""
def __init__(self,
dropblock_keep_prob=None,
dropblock_size=None,
data_format='channels_last'):
self._dropblock_keep_prob = dropblock_keep_prob
self._dropblock_size = dropblock_size
self._data_format = data_format
def __call__(self, net, is_training=False):
"""Builds Dropblock layer.
Args:
net: `Tensor` input tensor.
is_training: `bool` if True, the model is in training mode.
Returns:
A version of input tensor with DropBlock applied.
"""
if not is_training or self._dropblock_keep_prob is None:
return net
logging.info('Applying DropBlock: dropblock_size {}, net.shape {}'.format(
self._dropblock_size, net.shape))
if self._data_format == 'channels_last':
_, height, width, _ = net.get_shape().as_list()
else:
_, _, height, width = net.get_shape().as_list()
total_size = width * height
dropblock_size = min(self._dropblock_size, min(width, height))
# Seed_drop_rate is the gamma parameter of DropBlcok.
seed_drop_rate = (
1.0 - self._dropblock_keep_prob) * total_size / dropblock_size**2 / (
(width - self._dropblock_size + 1) *
(height - self._dropblock_size + 1))
# Forces the block to be inside the feature map.
w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height))
valid_block = tf.logical_and(
tf.logical_and(w_i >= int(dropblock_size // 2),
w_i < width - (dropblock_size - 1) // 2),
tf.logical_and(h_i >= int(dropblock_size // 2),
h_i < width - (dropblock_size - 1) // 2))
if self._data_format == 'channels_last':
valid_block = tf.reshape(valid_block, [1, height, width, 1])
else:
valid_block = tf.reshape(valid_block, [1, 1, height, width])
randnoise = tf.random.uniform(net.shape, dtype=tf.float32)
valid_block = tf.cast(valid_block, dtype=tf.float32)
seed_keep_rate = tf.cast(1 - seed_drop_rate, dtype=tf.float32)
block_pattern = (1 - valid_block + seed_keep_rate + randnoise) >= 1
block_pattern = tf.cast(block_pattern, dtype=tf.float32)
if self._data_format == 'channels_last':
ksize = [1, self._dropblock_size, self._dropblock_size, 1]
else:
ksize = [1, 1, self._dropblock_size, self._dropblock_size]
block_pattern = -tf.nn.max_pool2d(
-block_pattern,
ksize=ksize,
strides=[1, 1, 1, 1],
padding='SAME',
data_format='NHWC' if self._data_format == 'channels_last' else 'NCHW')
percent_ones = tf.cast(
tf.reduce_sum(input_tensor=block_pattern), tf.float32) / tf.cast(
tf.size(input=block_pattern), tf.float32)
net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
block_pattern, net.dtype)
return net
......@@ -34,14 +34,12 @@ class Resnet(object):
def __init__(self,
resnet_depth,
dropblock=nn_ops.Dropblock(),
batch_norm_relu=nn_ops.BatchNormRelu,
data_format='channels_last'):
"""ResNet initialization function.
Args:
resnet_depth: `int` depth of ResNet backbone model.
dropblock: a dropblock layer.
batch_norm_relu: an operation that includes a batch normalization layer
followed by a relu layer(optional).
data_format: `str` either "channels_first" for `[batch, channels, height,
......@@ -49,7 +47,6 @@ class Resnet(object):
"""
self._resnet_depth = resnet_depth
self._dropblock = dropblock
self._batch_norm_relu = batch_norm_relu
self._data_format = data_format
......@@ -219,24 +216,20 @@ class Resnet(object):
inputs=inputs, filters=filters_out, kernel_size=1, strides=strides)
shortcut = self._batch_norm_relu(relu=False)(
shortcut, is_training=is_training)
shortcut = self._dropblock(shortcut, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1)
inputs = self._batch_norm_relu()(inputs, is_training=is_training)
inputs = self._dropblock(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides)
inputs = self._batch_norm_relu()(inputs, is_training=is_training)
inputs = self._dropblock(inputs, is_training=is_training)
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
inputs = self._batch_norm_relu(
relu=False, init_zero=True)(
inputs, is_training=is_training)
inputs = self._dropblock(inputs, is_training=is_training)
return tf.nn.relu(inputs + shortcut)
......
# Image Classification
This folder contains the TF 2.0 model examples for image classification:
This folder contains TF 2.0 model examples for image classification:
* [ResNet](#resnet)
* [MNIST](#mnist)
* [Classifier Trainer](#classifier-trainer), a framework that uses the Keras
compile/fit methods for image classification models, including:
* ResNet
* EfficientNet[^1]
[^1]: Currently a work in progress. We cannot match "AutoAugment (AA)" in [the original version](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet).
For more information about other types of models, please refer to this
[README file](../../README.md).
## ResNet
Similar to the [estimator implementation](../../r1/resnet), the Keras
implementation has code for the ImageNet dataset. The ImageNet
version uses a ResNet50 model implemented in
[`resnet_model.py`](./resnet/resnet_model.py).
## Before you begin
Please make sure that you have the latest version of TensorFlow
installed and
[add the models folder to your Python path](/official/#running-the-models).
### Pretrained Models
* [ResNet50 Checkpoints](https://storage.googleapis.com/cloud-tpu-checkpoints/resnet/resnet50.tar.gz)
* ResNet50 TFHub: [feature vector](https://tfhub.dev/tensorflow/resnet_50/feature_vector/1)
and [classification](https://tfhub.dev/tensorflow/resnet_50/classification/1)
### ImageNet Training
### ImageNet preparation
Download the ImageNet dataset and convert it to TFRecord format.
The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy)
provide a few options.
Once your dataset is ready, you can begin training the model as follows:
```bash
python resnet/resnet_imagenet_main.py
```
Again, if you did not download the data to the default directory, specify the
location with the `--data_dir` flag:
```bash
python resnet/resnet_imagenet_main.py --data_dir=/path/to/imagenet
```
There are more flag options you can specify. Here are some examples:
- `--use_synthetic_data`: when set to true, synthetic data, rather than real
data, are used;
- `--batch_size`: the batch size used for the model;
- `--model_dir`: the directory to save the model checkpoint;
- `--train_epochs`: number of epoches to run for training the model;
- `--train_steps`: number of steps to run for training the model. We now only
support a number that is smaller than the number of batches in an epoch.
- `--skip_eval`: when set to true, evaluation as well as validation during
training is skipped
For example, this is a typical command line to run with ImageNet data with
batch size 128 per GPU:
```bash
python -m resnet/resnet_imagenet_main.py \
--model_dir=/tmp/model_dir/something \
--num_gpus=2 \
--batch_size=128 \
--train_epochs=90 \
--train_steps=10 \
--use_synthetic_data=false
```
See [`common.py`](common.py) for full list of options.
### Using multiple GPUs
You can train these models on multiple GPUs using `tf.distribute.Strategy` API.
You can read more about them in this
[guide](https://www.tensorflow.org/guide/distribute_strategy).
In this example, we have made it easier to use is with just a command line flag
`--num_gpus`. By default this flag is 1 if TensorFlow is compiled with CUDA,
and 0 otherwise.
- --num_gpus=0: Uses tf.distribute.OneDeviceStrategy with CPU as the device.
- --num_gpus=1: Uses tf.distribute.OneDeviceStrategy with GPU as the device.
- --num_gpus=2+: Uses tf.distribute.MirroredStrategy to run synchronous
distributed training across the GPUs.
If you wish to run without `tf.distribute.Strategy`, you can do so by setting
`--distribution_strategy=off`.
### Running on multiple GPU hosts
You can also train these models on multiple hosts, each with GPUs, using
`tf.distribute.Strategy`.
The easiest way to run multi-host benchmarks is to set the
[`TF_CONFIG`](https://www.tensorflow.org/guide/distributed_training#TF_CONFIG)
appropriately at each host. e.g., to run using `MultiWorkerMirroredStrategy` on
2 hosts, the `cluster` in `TF_CONFIG` should have 2 `host:port` entries, and
host `i` should have the `task` in `TF_CONFIG` set to `{"type": "worker",
"index": i}`. `MultiWorkerMirroredStrategy` will automatically use all the
available GPUs at each host.
### Running on Cloud TPUs
Note: This model will **not** work with TPUs on Colab.
Note: These models will **not** work with TPUs on Colab.
You can train the ResNet CTL model on Cloud TPUs using
You can train image classification models on Cloud TPUs using
`tf.distribute.TPUStrategy`. If you are not familiar with Cloud TPUs, it is
strongly recommended that you go through the
[quickstart](https://cloud.google.com/tpu/docs/quickstart) to learn how to
create a TPU and GCE VM.
To run ResNet model on a TPU, you must set `--distribution_strategy=tpu` and
`--tpu=$TPU_NAME`, where `$TPU_NAME` the name of your TPU in the Cloud Console.
From a GCE VM, you can run the following command to train ResNet for one epoch
on a v2-8 or v3-8 TPU:
## MNIST
To download the data and run the MNIST sample model locally for the first time,
run one of the following command:
```bash
python resnet/resnet_ctl_imagenet_main.py \
--tpu=$TPU_NAME \
python3 mnist_main.py \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--batch_size=1024 \
--steps_per_loop=500 \
--train_epochs=1 \
--use_synthetic_data=false \
--dtype=fp32 \
--enable_eager=true \
--enable_tensorboard=true \
--distribution_strategy=tpu \
--log_steps=50 \
--single_l2_loss_op=true \
--use_tf_function=true
--train_epochs=10 \
--distribution_strategy=one_device \
--num_gpus=$NUM_GPUS \
--download
```
To train the ResNet to convergence, run it for 90 epochs:
To train the model on a Cloud TPU, run the following command:
```bash
python resnet/resnet_ctl_imagenet_main.py \
python3 mnist_main.py \
--tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--batch_size=1024 \
--steps_per_loop=500 \
--train_epochs=90 \
--use_synthetic_data=false \
--dtype=fp32 \
--enable_eager=true \
--enable_tensorboard=true \
--train_epochs=10 \
--distribution_strategy=tpu \
--log_steps=50 \
--single_l2_loss_op=true \
--use_tf_function=true
--download
```
Note: `$MODEL_DIR` and `$DATA_DIR` must be GCS paths.
Note: the `--download` flag is only required the first time you run the model.
## MNIST
## Classifier Trainer
The classifier trainer is a unified framework for running image classification
models using Keras's compile/fit methods. Experiments should be provided in the
form of YAML files, some examples are included within the configs/examples
folder. Please see [configs/examples](./configs/examples) for more example
configurations.
To download the data and run the MNIST sample model locally for the first time,
run one of the following command:
The provided configuration files use a per replica batch size and is scaled
by the number of devices. For instance, if `batch size` = 64, then for 1 GPU
the global batch size would be 64 * 1 = 64. For 8 GPUs, the global batch size
would be 64 * 8 = 512. Similarly, for a v3-8 TPU, the global batch size would
be 64 * 8 = 512, and for a v3-32, the global batch size is 64 * 32 = 2048.
### ResNet50
#### On GPU:
```bash
python mnist_main.py \
python3 classifier_trainer.py \
--mode=train_and_eval \
--model_type=resnet \
--dataset=imagenet \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--train_epochs=10 \
--distribution_strategy=one_device \
--num_gpus=$NUM_GPUS \
--download
--config_file=configs/examples/resnet/imagenet/gpu.yaml \
--params_override='runtime.num_gpus=$NUM_GPUS'
```
To train the model on a Cloud TPU, run the following command:
#### On TPU:
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
--model_type=resnet \
--dataset=imagenet \
--tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--config_file=config/examples/resnet/imagenet/tpu.yaml
```
### EfficientNet
**Note: EfficientNet development is a work in progress.**
#### On GPU:
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
--model_type=efficientnet \
--dataset=imagenet \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--config_file=configs/examples/efficientnet/imagenet/efficientnet-b0-gpu.yaml \
--params_override='runtime.num_gpus=$NUM_GPUS'
```
#### On TPU:
```bash
python mnist_main.py \
python3 classifier_trainer.py \
--mode=train_and_eval \
--model_type=efficientnet \
--dataset=imagenet \
--tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--train_epochs=10 \
--distribution_strategy=tpu \
--download
--config_file=config/examples/efficientnet/imagenet/efficientnet-b0-tpu.yaml
```
Note: the `--download` flag is only required the first time you run the model.
Note that the number of GPU devices can be overridden in the command line using
`--params_overrides`. The TPU does not need this override as the device is fixed
by providing the TPU address or name with the `--tpu` flag.
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""AutoAugment and RandAugment policies for enhanced image preprocessing.
AutoAugment Reference: https://arxiv.org/abs/1805.09501
RandAugment Reference: https://arxiv.org/abs/1909.13719
"""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import math
import tensorflow.compat.v2 as tf
from typing import Any, Dict, Iterable, List, Optional, Text, Tuple, Union
from tensorflow.python.keras.layers.preprocessing import image_preprocessing as image_ops
# This signifies the max integer that the controller RNN could predict for the
# augmentation scheme.
_MAX_LEVEL = 10.
def to_4d(image: tf.Tensor) -> tf.Tensor:
"""Converts an input Tensor to 4 dimensions.
4D image => [N, H, W, C] or [N, C, H, W]
3D image => [1, H, W, C] or [1, C, H, W]
2D image => [1, H, W, 1]
Args:
image: The 2/3/4D input tensor.
Returns:
A 4D image tensor.
Raises:
`TypeError` if `image` is not a 2/3/4D tensor.
"""
shape = tf.shape(image)
original_rank = tf.rank(image)
left_pad = tf.cast(tf.less_equal(original_rank, 3), dtype=tf.int32)
right_pad = tf.cast(tf.equal(original_rank, 2), dtype=tf.int32)
new_shape = tf.concat(
[
tf.ones(shape=left_pad, dtype=tf.int32),
shape,
tf.ones(shape=right_pad, dtype=tf.int32),
],
axis=0,
)
return tf.reshape(image, new_shape)
def from_4d(image: tf.Tensor, ndims: int) -> tf.Tensor:
"""Converts a 4D image back to `ndims` rank."""
shape = tf.shape(image)
begin = tf.cast(tf.less_equal(ndims, 3), dtype=tf.int32)
end = 4 - tf.cast(tf.equal(ndims, 2), dtype=tf.int32)
new_shape = shape[begin:end]
return tf.reshape(image, new_shape)
def _convert_translation_to_transform(
translations: Iterable[int]) -> tf.Tensor:
"""Converts translations to a projective transform.
The translation matrix looks like this:
[[1 0 -dx]
[0 1 -dy]
[0 0 1]]
Args:
translations: The 2-element list representing [dx, dy], or a matrix of
2-element lists representing [dx dy] to translate for each image. The
shape must be static.
Returns:
The transformation matrix of shape (num_images, 8).
Raises:
`TypeError` if
- the shape of `translations` is not known or
- the shape of `translations` is not rank 1 or 2.
"""
translations = tf.convert_to_tensor(translations, dtype=tf.float32)
if translations.get_shape().ndims is None:
raise TypeError('translations rank must be statically known')
elif len(translations.get_shape()) == 1:
translations = translations[None]
elif len(translations.get_shape()) != 2:
raise TypeError('translations should have rank 1 or 2.')
num_translations = tf.shape(translations)[0]
return tf.concat(
values=[
tf.ones((num_translations, 1), tf.dtypes.float32),
tf.zeros((num_translations, 1), tf.dtypes.float32),
-translations[:, 0, None],
tf.zeros((num_translations, 1), tf.dtypes.float32),
tf.ones((num_translations, 1), tf.dtypes.float32),
-translations[:, 1, None],
tf.zeros((num_translations, 2), tf.dtypes.float32),
],
axis=1,
)
def _convert_angles_to_transform(
angles: Union[Iterable[float], float],
image_width: int,
image_height: int) -> tf.Tensor:
"""Converts an angle or angles to a projective transform.
Args:
angles: A scalar to rotate all images, or a vector to rotate a batch of
images. This must be a scalar.
image_width: The width of the image(s) to be transformed.
image_height: The height of the image(s) to be transformed.
Returns:
A tensor of shape (num_images, 8).
Raises:
`TypeError` if `angles` is not rank 0 or 1.
"""
angles = tf.convert_to_tensor(angles, dtype=tf.float32)
if len(angles.get_shape()) == 0: # pylint:disable=g-explicit-length-test
angles = angles[None]
elif len(angles.get_shape()) != 1:
raise TypeError('Angles should have a rank 0 or 1.')
x_offset = ((image_width - 1) -
(tf.math.cos(angles) * (image_width - 1) - tf.math.sin(angles) *
(image_height - 1))) / 2.0
y_offset = ((image_height - 1) -
(tf.math.sin(angles) * (image_width - 1) + tf.math.cos(angles) *
(image_height - 1))) / 2.0
num_angles = tf.shape(angles)[0]
return tf.concat(
values=[
tf.math.cos(angles)[:, None],
-tf.math.sin(angles)[:, None],
x_offset[:, None],
tf.math.sin(angles)[:, None],
tf.math.cos(angles)[:, None],
y_offset[:, None],
tf.zeros((num_angles, 2), tf.dtypes.float32),
],
axis=1,
)
def transform(image: tf.Tensor,
transforms: Iterable[float]) -> tf.Tensor:
"""Prepares input data for `image_ops.transform`."""
original_ndims = tf.rank(image)
transforms = tf.convert_to_tensor(transforms, dtype=tf.float32)
if len(tf.shape(transforms)) == 1:
transforms = transforms[None]
image = to_4d(image)
image = image_ops.transform(
images=image,
transforms=transforms,
interpolation='nearest')
return from_4d(image, original_ndims)
def translate(image: tf.Tensor,
translations: Iterable[int]) -> tf.Tensor:
"""Translates image(s) by provided vectors.
Args:
image: An image Tensor of type uint8.
translations: A vector or matrix representing [dx dy].
Returns:
The translated version of the image.
"""
transforms = _convert_translation_to_transform(translations)
return transform(image, transforms=transforms)
def rotate(image: tf.Tensor, degrees: float) -> tf.Tensor:
"""Rotates the image by degrees either clockwise or counterclockwise.
Args:
image: An image Tensor of type uint8.
degrees: Float, a scalar angle in degrees to rotate all images by. If
degrees is positive the image will be rotated clockwise otherwise it will
be rotated counterclockwise.
Returns:
The rotated version of image.
"""
# Convert from degrees to radians.
degrees_to_radians = math.pi / 180.0
radians = degrees * degrees_to_radians
original_ndims = tf.rank(image)
image = to_4d(image)
image_height = tf.cast(tf.shape(image)[1], tf.float32)
image_width = tf.cast(tf.shape(image)[2], tf.float32)
transforms = _convert_angles_to_transform(angles=radians,
image_width=image_width,
image_height=image_height)
# In practice, we should randomize the rotation degrees by flipping
# it negatively half the time, but that's done on 'degrees' outside
# of the function.
image = transform(image, transforms=transforms)
return from_4d(image, original_ndims)
def blend(image1: tf.Tensor, image2: tf.Tensor, factor: float) -> tf.Tensor:
"""Blend image1 and image2 using 'factor'.
Factor can be above 0.0. A value of 0.0 means only image1 is used.
A value of 1.0 means only image2 is used. A value between 0.0 and
1.0 means we linearly interpolate the pixel values between the two
images. A value greater than 1.0 "extrapolates" the difference
between the two pixel values, and we clip the results to values
between 0 and 255.
Args:
image1: An image Tensor of type uint8.
image2: An image Tensor of type uint8.
factor: A floating point value above 0.0.
Returns:
A blended image Tensor of type uint8.
"""
if factor == 0.0:
return tf.convert_to_tensor(image1)
if factor == 1.0:
return tf.convert_to_tensor(image2)
image1 = tf.cast(image1, tf.float32)
image2 = tf.cast(image2, tf.float32)
difference = image2 - image1
scaled = factor * difference
# Do addition in float.
temp = tf.cast(image1, tf.float32) + scaled
# Interpolate
if factor > 0.0 and factor < 1.0:
# Interpolation means we always stay within 0 and 255.
return tf.cast(temp, tf.uint8)
# Extrapolate:
#
# We need to clip and then cast.
return tf.cast(tf.clip_by_value(temp, 0.0, 255.0), tf.uint8)
def cutout(image: tf.Tensor, pad_size: int, replace: int = 0) -> tf.Tensor:
"""Apply cutout (https://arxiv.org/abs/1708.04552) to image.
This operation applies a (2*pad_size x 2*pad_size) mask of zeros to
a random location within `img`. The pixel values filled in will be of the
value `replace`. The located where the mask will be applied is randomly
chosen uniformly over the whole image.
Args:
image: An image Tensor of type uint8.
pad_size: Specifies how big the zero mask that will be generated is that
is applied to the image. The mask will be of size
(2*pad_size x 2*pad_size).
replace: What pixel value to fill in the image in the area that has
the cutout mask applied to it.
Returns:
An image Tensor that is of type uint8.
"""
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
# Sample the center location in the image where the zero mask will be applied.
cutout_center_height = tf.random.uniform(
shape=[], minval=0, maxval=image_height,
dtype=tf.int32)
cutout_center_width = tf.random.uniform(
shape=[], minval=0, maxval=image_width,
dtype=tf.int32)
lower_pad = tf.maximum(0, cutout_center_height - pad_size)
upper_pad = tf.maximum(0, image_height - cutout_center_height - pad_size)
left_pad = tf.maximum(0, cutout_center_width - pad_size)
right_pad = tf.maximum(0, image_width - cutout_center_width - pad_size)
cutout_shape = [image_height - (lower_pad + upper_pad),
image_width - (left_pad + right_pad)]
padding_dims = [[lower_pad, upper_pad], [left_pad, right_pad]]
mask = tf.pad(
tf.zeros(cutout_shape, dtype=image.dtype),
padding_dims, constant_values=1)
mask = tf.expand_dims(mask, -1)
mask = tf.tile(mask, [1, 1, 3])
image = tf.where(
tf.equal(mask, 0),
tf.ones_like(image, dtype=image.dtype) * replace,
image)
return image
def solarize(image: tf.Tensor, threshold: int = 128) -> tf.Tensor:
# For each pixel in the image, select the pixel
# if the value is less than the threshold.
# Otherwise, subtract 255 from the pixel.
return tf.where(image < threshold, image, 255 - image)
def solarize_add(image: tf.Tensor,
addition: int = 0,
threshold: int = 128) -> tf.Tensor:
# For each pixel in the image less than threshold
# we add 'addition' amount to it and then clip the
# pixel value to be between 0 and 255. The value
# of 'addition' is between -128 and 128.
added_image = tf.cast(image, tf.int64) + addition
added_image = tf.cast(tf.clip_by_value(added_image, 0, 255), tf.uint8)
return tf.where(image < threshold, added_image, image)
def color(image: tf.Tensor, factor: float) -> tf.Tensor:
"""Equivalent of PIL Color."""
degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))
return blend(degenerate, image, factor)
def contrast(image: tf.Tensor, factor: float) -> tf.Tensor:
"""Equivalent of PIL Contrast."""
degenerate = tf.image.rgb_to_grayscale(image)
# Cast before calling tf.histogram.
degenerate = tf.cast(degenerate, tf.int32)
# Compute the grayscale histogram, then compute the mean pixel value,
# and create a constant image size of that value. Use that as the
# blending degenerate target of the original image.
hist = tf.histogram_fixed_width(degenerate, [0, 255], nbins=256)
mean = tf.reduce_sum(tf.cast(hist, tf.float32)) / 256.0
degenerate = tf.ones_like(degenerate, dtype=tf.float32) * mean
degenerate = tf.clip_by_value(degenerate, 0.0, 255.0)
degenerate = tf.image.grayscale_to_rgb(tf.cast(degenerate, tf.uint8))
return blend(degenerate, image, factor)
def brightness(image: tf.Tensor, factor: float) -> tf.Tensor:
"""Equivalent of PIL Brightness."""
degenerate = tf.zeros_like(image)
return blend(degenerate, image, factor)
def posterize(image: tf.Tensor, bits: int) -> tf.Tensor:
"""Equivalent of PIL Posterize."""
shift = 8 - bits
return tf.bitwise.left_shift(tf.bitwise.right_shift(image, shift), shift)
def wrapped_rotate(image: tf.Tensor, degrees: float, replace: int) -> tf.Tensor:
"""Applies rotation with wrap/unwrap."""
image = rotate(wrap(image), degrees=degrees)
return unwrap(image, replace)
def translate_x(image: tf.Tensor, pixels: int, replace: int) -> tf.Tensor:
"""Equivalent of PIL Translate in X dimension."""
image = translate(wrap(image), [-pixels, 0])
return unwrap(image, replace)
def translate_y(image: tf.Tensor, pixels: int, replace: int) -> tf.Tensor:
"""Equivalent of PIL Translate in Y dimension."""
image = translate(wrap(image), [0, -pixels])
return unwrap(image, replace)
def shear_x(image: tf.Tensor, level: float, replace: int) -> tf.Tensor:
"""Equivalent of PIL Shearing in X dimension."""
# Shear parallel to x axis is a projective transform
# with a matrix form of:
# [1 level
# 0 1].
image = transform(image=wrap(image),
transforms=[1., level, 0., 0., 1., 0., 0., 0.])
return unwrap(image, replace)
def shear_y(image: tf.Tensor, level: float, replace: int) -> tf.Tensor:
"""Equivalent of PIL Shearing in Y dimension."""
# Shear parallel to y axis is a projective transform
# with a matrix form of:
# [1 0
# level 1].
image = transform(image=wrap(image),
transforms=[1., 0., 0., level, 1., 0., 0., 0.])
return unwrap(image, replace)
def autocontrast(image: tf.Tensor) -> tf.Tensor:
"""Implements Autocontrast function from PIL using TF ops.
Args:
image: A 3D uint8 tensor.
Returns:
The image after it has had autocontrast applied to it and will be of type
uint8.
"""
def scale_channel(image: tf.Tensor) -> tf.Tensor:
"""Scale the 2D image using the autocontrast rule."""
# A possibly cheaper version can be done using cumsum/unique_with_counts
# over the histogram values, rather than iterating over the entire image.
# to compute mins and maxes.
lo = tf.cast(tf.reduce_min(image), tf.float32)
hi = tf.cast(tf.reduce_max(image), tf.float32)
# Scale the image, making the lowest value 0 and the highest value 255.
def scale_values(im):
scale = 255.0 / (hi - lo)
offset = -lo * scale
im = tf.cast(im, tf.float32) * scale + offset
im = tf.clip_by_value(im, 0.0, 255.0)
return tf.cast(im, tf.uint8)
result = tf.cond(hi > lo, lambda: scale_values(image), lambda: image)
return result
# Assumes RGB for now. Scales each channel independently
# and then stacks the result.
s1 = scale_channel(image[:, :, 0])
s2 = scale_channel(image[:, :, 1])
s3 = scale_channel(image[:, :, 2])
image = tf.stack([s1, s2, s3], 2)
return image
def sharpness(image: tf.Tensor, factor: float) -> tf.Tensor:
"""Implements Sharpness function from PIL using TF ops."""
orig_image = image
image = tf.cast(image, tf.float32)
# Make image 4D for conv operation.
image = tf.expand_dims(image, 0)
# SMOOTH PIL Kernel.
kernel = tf.constant(
[[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=tf.float32,
shape=[3, 3, 1, 1]) / 13.
# Tile across channel dimension.
kernel = tf.tile(kernel, [1, 1, 3, 1])
strides = [1, 1, 1, 1]
degenerate = tf.nn.depthwise_conv2d(
image, kernel, strides, padding='VALID', dilations=[1, 1])
degenerate = tf.clip_by_value(degenerate, 0.0, 255.0)
degenerate = tf.squeeze(tf.cast(degenerate, tf.uint8), [0])
# For the borders of the resulting image, fill in the values of the
# original image.
mask = tf.ones_like(degenerate)
padded_mask = tf.pad(mask, [[1, 1], [1, 1], [0, 0]])
padded_degenerate = tf.pad(degenerate, [[1, 1], [1, 1], [0, 0]])
result = tf.where(tf.equal(padded_mask, 1), padded_degenerate, orig_image)
# Blend the final result.
return blend(result, orig_image, factor)
def equalize(image: tf.Tensor) -> tf.Tensor:
"""Implements Equalize function from PIL using TF ops."""
def scale_channel(im, c):
"""Scale the data in the channel to implement equalize."""
im = tf.cast(im[:, :, c], tf.int32)
# Compute the histogram of the image channel.
histo = tf.histogram_fixed_width(im, [0, 255], nbins=256)
# For the purposes of computing the step, filter out the nonzeros.
nonzero = tf.where(tf.not_equal(histo, 0))
nonzero_histo = tf.reshape(tf.gather(histo, nonzero), [-1])
step = (tf.reduce_sum(nonzero_histo) - nonzero_histo[-1]) // 255
def build_lut(histo, step):
# Compute the cumulative sum, shifting by step // 2
# and then normalization by step.
lut = (tf.cumsum(histo) + (step // 2)) // step
# Shift lut, prepending with 0.
lut = tf.concat([[0], lut[:-1]], 0)
# Clip the counts to be in range. This is done
# in the C code for image.point.
return tf.clip_by_value(lut, 0, 255)
# If step is zero, return the original image. Otherwise, build
# lut from the full histogram and step and then index from it.
result = tf.cond(tf.equal(step, 0),
lambda: im,
lambda: tf.gather(build_lut(histo, step), im))
return tf.cast(result, tf.uint8)
# Assumes RGB for now. Scales each channel independently
# and then stacks the result.
s1 = scale_channel(image, 0)
s2 = scale_channel(image, 1)
s3 = scale_channel(image, 2)
image = tf.stack([s1, s2, s3], 2)
return image
def invert(image: tf.Tensor) -> tf.Tensor:
"""Inverts the image pixels."""
image = tf.convert_to_tensor(image)
return 255 - image
def wrap(image: tf.Tensor) -> tf.Tensor:
"""Returns 'image' with an extra channel set to all 1s."""
shape = tf.shape(image)
extended_channel = tf.ones([shape[0], shape[1], 1], image.dtype)
extended = tf.concat([image, extended_channel], axis=2)
return extended
def unwrap(image: tf.Tensor, replace: int) -> tf.Tensor:
"""Unwraps an image produced by wrap.
Where there is a 0 in the last channel for every spatial position,
the rest of the three channels in that spatial dimension are grayed
(set to 128). Operations like translate and shear on a wrapped
Tensor will leave 0s in empty locations. Some transformations look
at the intensity of values to do preprocessing, and we want these
empty pixels to assume the 'average' value, rather than pure black.
Args:
image: A 3D Image Tensor with 4 channels.
replace: A one or three value 1D tensor to fill empty pixels.
Returns:
image: A 3D image Tensor with 3 channels.
"""
image_shape = tf.shape(image)
# Flatten the spatial dimensions.
flattened_image = tf.reshape(image, [-1, image_shape[2]])
# Find all pixels where the last channel is zero.
alpha_channel = tf.expand_dims(flattened_image[:, 3], axis=-1)
replace = tf.concat([replace, tf.ones([1], image.dtype)], 0)
# Where they are zero, fill them in with 'replace'.
flattened_image = tf.where(
tf.equal(alpha_channel, 0),
tf.ones_like(flattened_image, dtype=image.dtype) * replace,
flattened_image)
image = tf.reshape(flattened_image, image_shape)
image = tf.slice(image, [0, 0, 0], [image_shape[0], image_shape[1], 3])
return image
def _randomly_negate_tensor(tensor: tf.Tensor) -> tf.Tensor:
"""With 50% prob turn the tensor negative."""
should_flip = tf.cast(tf.floor(tf.random.uniform([]) + 0.5), tf.bool)
final_tensor = tf.cond(should_flip, lambda: tensor, lambda: -tensor)
return final_tensor
def _rotate_level_to_arg(level: float):
level = (level/_MAX_LEVEL) * 30.
level = _randomly_negate_tensor(level)
return (level,)
def _shrink_level_to_arg(level: float):
"""Converts level to ratio by which we shrink the image content."""
if level == 0:
return (1.0,) # if level is zero, do not shrink the image
# Maximum shrinking ratio is 2.9.
level = 2. / (_MAX_LEVEL / level) + 0.9
return (level,)
def _enhance_level_to_arg(level: float):
return ((level/_MAX_LEVEL) * 1.8 + 0.1,)
def _shear_level_to_arg(level: float):
level = (level/_MAX_LEVEL) * 0.3
# Flip level to negative with 50% chance.
level = _randomly_negate_tensor(level)
return (level,)
def _translate_level_to_arg(level: float, translate_const: float):
level = (level/_MAX_LEVEL) * float(translate_const)
# Flip level to negative with 50% chance.
level = _randomly_negate_tensor(level)
return (level,)
def _mult_to_arg(level: float, multiplier: float = 1.):
return (int((level / _MAX_LEVEL) * multiplier),)
def _apply_func_with_prob(func: Any,
image: tf.Tensor,
args: Any,
prob: float):
"""Apply `func` to image w/ `args` as input with probability `prob`."""
assert isinstance(args, tuple)
# Apply the function with probability `prob`.
should_apply_op = tf.cast(
tf.floor(tf.random.uniform([], dtype=tf.float32) + prob), tf.bool)
augmented_image = tf.cond(
should_apply_op,
lambda: func(image, *args),
lambda: image)
return augmented_image
def select_and_apply_random_policy(policies: Any, image: tf.Tensor):
"""Select a random policy from `policies` and apply it to `image`."""
policy_to_select = tf.random.uniform([], maxval=len(policies), dtype=tf.int32)
# Note that using tf.case instead of tf.conds would result in significantly
# larger graphs and would even break export for some larger policies.
for (i, policy) in enumerate(policies):
image = tf.cond(
tf.equal(i, policy_to_select),
lambda selected_policy=policy: selected_policy(image),
lambda: image)
return image
NAME_TO_FUNC = {
'AutoContrast': autocontrast,
'Equalize': equalize,
'Invert': invert,
'Rotate': wrapped_rotate,
'Posterize': posterize,
'Solarize': solarize,
'SolarizeAdd': solarize_add,
'Color': color,
'Contrast': contrast,
'Brightness': brightness,
'Sharpness': sharpness,
'ShearX': shear_x,
'ShearY': shear_y,
'TranslateX': translate_x,
'TranslateY': translate_y,
'Cutout': cutout,
}
# Functions that have a 'replace' parameter
REPLACE_FUNCS = frozenset({
'Rotate',
'TranslateX',
'ShearX',
'ShearY',
'TranslateY',
'Cutout',
})
def level_to_arg(cutout_const: float, translate_const: float):
"""Creates a dict mapping image operation names to their arguments."""
no_arg = lambda level: ()
posterize_arg = lambda level: _mult_to_arg(level, 4)
solarize_arg = lambda level: _mult_to_arg(level, 256)
solarize_add_arg = lambda level: _mult_to_arg(level, 110)
cutout_arg = lambda level: _mult_to_arg(level, cutout_const)
translate_arg = lambda level: _translate_level_to_arg(level, translate_const)
args = {
'AutoContrast': no_arg,
'Equalize': no_arg,
'Invert': no_arg,
'Rotate': _rotate_level_to_arg,
'Posterize': posterize_arg,
'Solarize': solarize_arg,
'SolarizeAdd': solarize_add_arg,
'Color': _enhance_level_to_arg,
'Contrast': _enhance_level_to_arg,
'Brightness': _enhance_level_to_arg,
'Sharpness': _enhance_level_to_arg,
'ShearX': _shear_level_to_arg,
'ShearY': _shear_level_to_arg,
'Cutout': cutout_arg,
'TranslateX': translate_arg,
'TranslateY': translate_arg,
}
return args
def _parse_policy_info(name: Text,
prob: float,
level: float,
replace_value: List[int],
cutout_const: float,
translate_const: float) -> Tuple[Any, float, Any]:
"""Return the function that corresponds to `name` and update `level` param."""
func = NAME_TO_FUNC[name]
args = level_to_arg(cutout_const, translate_const)[name](level)
if name in REPLACE_FUNCS:
# Add in replace arg if it is required for the function that is called.
args = tuple(list(args) + [replace_value])
return func, prob, args
class ImageAugment(object):
"""Image augmentation class for applying image distortions."""
def distort(self, image: tf.Tensor) -> tf.Tensor:
"""Given an image tensor, returns a distorted image with the same shape.
Args:
image: `Tensor` of shape [height, width, 3] representing an image.
Returns:
The augmented version of `image`.
"""
raise NotImplementedError()
class AutoAugment(ImageAugment):
"""Applies the AutoAugment policy to images.
AutoAugment is from the paper: https://arxiv.org/abs/1805.09501.
"""
def __init__(self,
augmentation_name: Text = 'v0',
policies: Optional[Dict[Text, Any]] = None,
cutout_const: float = 100,
translate_const: float = 250):
"""Applies the AutoAugment policy to images.
Args:
augmentation_name: The name of the AutoAugment policy to use. The
available options are `v0` and `test`. `v0` is the policy used for all
of the results in the paper and was found to achieve the best results on
the COCO dataset. `v1`, `v2` and `v3` are additional good policies found
on the COCO dataset that have slight variation in what operations were
used during the search procedure along with how many operations are
applied in parallel to a single image (2 vs 3).
policies: list of lists of tuples in the form `(func, prob, level)`,
`func` is a string name of the augmentation function, `prob` is the
probability of applying the `func` operation, `level` is the input
argument for `func`.
cutout_const: multiplier for applying cutout.
translate_const: multiplier for applying translation.
"""
super(AutoAugment, self).__init__()
if policies is None:
self.available_policies = {
'v0': self.policy_v0(),
'test': self.policy_test(),
'simple': self.policy_simple(),
}
if augmentation_name not in self.available_policies:
raise ValueError(
'Invalid augmentation_name: {}'.format(augmentation_name))
self.augmentation_name = augmentation_name
self.policies = self.available_policies[augmentation_name]
self.cutout_const = float(cutout_const)
self.translate_const = float(translate_const)
def distort(self, image: tf.Tensor) -> tf.Tensor:
"""Applies the AutoAugment policy to `image`.
AutoAugment is from the paper: https://arxiv.org/abs/1805.09501.
Args:
image: `Tensor` of shape [height, width, 3] representing an image.
Returns:
A version of image that now has data augmentation applied to it based on
the `policies` pass into the function.
"""
input_image_type = image.dtype
if input_image_type != tf.uint8:
image = tf.clip_by_value(image, 0.0, 255.0)
image = tf.cast(image, dtype=tf.uint8)
replace_value = [128] * 3
# func is the string name of the augmentation function, prob is the
# probability of applying the operation and level is the parameter
# associated with the tf op.
# tf_policies are functions that take in an image and return an augmented
# image.
tf_policies = []
for policy in self.policies:
tf_policy = []
# Link string name to the correct python function and make sure the
# correct argument is passed into that function.
for policy_info in policy:
policy_info = list(policy_info) + [
replace_value, self.cutout_const, self.translate_const
]
tf_policy.append(_parse_policy_info(*policy_info))
# Now build the tf policy that will apply the augmentation procedue
# on image.
def make_final_policy(tf_policy_):
def final_policy(image_):
for func, prob, args in tf_policy_:
image_ = _apply_func_with_prob(func, image_, args, prob)
return image_
return final_policy
tf_policies.append(make_final_policy(tf_policy))
image = select_and_apply_random_policy(tf_policies, image)
image = tf.cast(image, dtype=input_image_type)
return image
@staticmethod
def policy_v0():
"""Autoaugment policy that was used in AutoAugment Paper.
Each tuple is an augmentation operation of the form
(operation, probability, magnitude). Each element in policy is a
sub-policy that will be applied sequentially on the image.
Returns:
the policy.
"""
# TODO(dankondratyuk): tensorflow_addons defines custom ops, which
# for some reason are not included when building/linking
# This results in the error, "Op type not registered
# 'Addons>ImageProjectiveTransformV2' in binary" when running on borg TPUs
policy = [
[('Equalize', 0.8, 1), ('ShearY', 0.8, 4)],
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
[('Color', 0.4, 1), ('Rotate', 0.6, 8)],
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
[('ShearX', 0.2, 9), ('Rotate', 0.6, 8)],
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
[('Invert', 0.4, 9), ('Rotate', 0.6, 0)],
[('Equalize', 1.0, 9), ('ShearY', 0.6, 3)],
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
[('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)],
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
[('Solarize', 0.2, 4), ('Rotate', 0.8, 9)],
[('Rotate', 1.0, 7), ('TranslateY', 0.8, 9)],
[('ShearX', 0.0, 0), ('Solarize', 0.8, 4)],
[('ShearY', 0.8, 0), ('Color', 0.6, 4)],
[('Color', 1.0, 0), ('Rotate', 0.6, 2)],
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
[('ShearY', 0.4, 7), ('SolarizeAdd', 0.6, 7)],
[('Posterize', 0.8, 2), ('Solarize', 0.6, 10)],
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
[('Color', 0.8, 6), ('Rotate', 0.4, 5)],
]
return policy
@staticmethod
def policy_simple():
"""Same as `policy_v0`, except with custom ops removed."""
policy = [
[('Color', 0.4, 9), ('Equalize', 0.6, 3)],
[('Solarize', 0.8, 3), ('Equalize', 0.4, 7)],
[('Solarize', 0.4, 2), ('Solarize', 0.6, 2)],
[('Color', 0.2, 0), ('Equalize', 0.8, 8)],
[('Equalize', 0.4, 8), ('SolarizeAdd', 0.8, 3)],
[('Color', 0.6, 1), ('Equalize', 1.0, 2)],
[('Color', 0.4, 7), ('Equalize', 0.6, 0)],
[('Posterize', 0.4, 6), ('AutoContrast', 0.4, 7)],
[('Solarize', 0.6, 8), ('Color', 0.6, 9)],
[('Equalize', 0.8, 4), ('Equalize', 0.0, 8)],
[('Equalize', 1.0, 4), ('AutoContrast', 0.6, 2)],
[('Posterize', 0.8, 2), ('Solarize', 0.6, 10)],
[('Solarize', 0.6, 8), ('Equalize', 0.6, 1)],
]
return policy
@staticmethod
def policy_test():
"""Autoaugment test policy for debugging."""
policy = [
[('TranslateX', 1.0, 4), ('Equalize', 1.0, 10)],
]
return policy
class RandAugment(ImageAugment):
"""Applies the RandAugment policy to images.
RandAugment is from the paper https://arxiv.org/abs/1909.13719,
"""
def __init__(self,
num_layers: int = 2,
magnitude: float = 10.,
cutout_const: float = 40.,
translate_const: float = 100.):
"""Applies the RandAugment policy to images.
Args:
num_layers: Integer, the number of augmentation transformations to apply
sequentially to an image. Represented as (N) in the paper. Usually best
values will be in the range [1, 3].
magnitude: Integer, shared magnitude across all augmentation operations.
Represented as (M) in the paper. Usually best values are in the range
[5, 10].
cutout_const: multiplier for applying cutout.
translate_const: multiplier for applying translation.
"""
super(RandAugment, self).__init__()
self.num_layers = num_layers
self.magnitude = float(magnitude)
self.cutout_const = float(cutout_const)
self.translate_const = float(translate_const)
self.available_ops = [
'AutoContrast', 'Equalize', 'Invert', 'Rotate', 'Posterize', 'Solarize',
'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'ShearY',
'TranslateX', 'TranslateY', 'Cutout', 'SolarizeAdd'
]
def distort(self, image: tf.Tensor) -> tf.Tensor:
"""Applies the RandAugment policy to `image`.
Args:
image: `Tensor` of shape [height, width, 3] representing an image.
Returns:
The augmented version of `image`.
"""
input_image_type = image.dtype
if input_image_type != tf.uint8:
image = tf.clip_by_value(image, 0.0, 255.0)
image = tf.cast(image, dtype=tf.uint8)
replace_value = [128] * 3
min_prob, max_prob = 0.2, 0.8
for _ in range(self.num_layers):
op_to_select = tf.random.uniform(
[], maxval=len(self.available_ops) + 1, dtype=tf.int32)
branch_fns = []
for (i, op_name) in enumerate(self.available_ops):
prob = tf.random.uniform([],
minval=min_prob,
maxval=max_prob,
dtype=tf.float32)
func, _, args = _parse_policy_info(op_name,
prob,
self.magnitude,
replace_value,
self.cutout_const,
self.translate_const)
branch_fns.append((
i,
# pylint:disable=g-long-lambda
lambda selected_func=func, selected_args=args: selected_func(
image, *selected_args)))
# pylint:enable=g-long-lambda
image = tf.switch_case(branch_index=op_to_select,
branch_fns=branch_fns,
default=lambda: tf.identity(image))
image = tf.cast(image, dtype=input_image_type)
return image
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for autoaugment."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
from absl.testing import parameterized
import tensorflow.compat.v2 as tf
from official.vision.image_classification import augment
def get_dtype_test_cases():
return [
('uint8', tf.uint8),
('int32', tf.int32),
('float16', tf.float16),
('float32', tf.float32),
]
@parameterized.named_parameters(get_dtype_test_cases())
class TransformsTest(parameterized.TestCase, tf.test.TestCase):
"""Basic tests for fundamental transformations."""
def test_to_from_4d(self, dtype):
for shape in [(10, 10), (10, 10, 10), (10, 10, 10, 10)]:
original_ndims = len(shape)
image = tf.zeros(shape, dtype=dtype)
image_4d = augment.to_4d(image)
self.assertEqual(4, tf.rank(image_4d))
self.assertAllEqual(image, augment.from_4d(image_4d, original_ndims))
def test_transform(self, dtype):
image = tf.constant([[1, 2], [3, 4]], dtype=dtype)
self.assertAllEqual(augment.transform(image, transforms=[1]*8),
[[4, 4], [4, 4]])
def disable_test_translate(self, dtype):
image = tf.constant(
[[1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1]],
dtype=dtype)
translations = [-1, -1]
translated = augment.translate(image=image,
translations=translations)
expected = [[1, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0], [0, 0, 0, 0]]
self.assertAllEqual(translated, expected)
def test_translate_shapes(self, dtype):
translation = [0, 0]
for shape in [(3, 3), (5, 5), (224, 224, 3)]:
image = tf.zeros(shape, dtype=dtype)
self.assertAllEqual(image, augment.translate(image, translation))
def test_translate_invalid_translation(self, dtype):
image = tf.zeros((1, 1), dtype=dtype)
invalid_translation = [[[1, 1]]]
with self.assertRaisesRegex(TypeError, 'rank 1 or 2'):
_ = augment.translate(image, invalid_translation)
def test_rotate(self, dtype):
image = tf.reshape(tf.cast(tf.range(9), dtype), (3, 3))
rotation = 90.
transformed = augment.rotate(image=image, degrees=rotation)
expected = [[2, 5, 8],
[1, 4, 7],
[0, 3, 6]]
self.assertAllEqual(transformed, expected)
def test_rotate_shapes(self, dtype):
degrees = 0.
for shape in [(3, 3), (5, 5), (224, 224, 3)]:
image = tf.zeros(shape, dtype=dtype)
self.assertAllEqual(image, augment.rotate(image, degrees))
class AutoaugmentTest(tf.test.TestCase):
def test_autoaugment(self):
"""Smoke test to be sure there are no syntax errors."""
image = tf.zeros((224, 224, 3), dtype=tf.uint8)
augmenter = augment.AutoAugment()
aug_image = augmenter.distort(image)
self.assertEqual((224, 224, 3), aug_image.shape)
def test_randaug(self):
"""Smoke test to be sure there are no syntax errors."""
image = tf.zeros((224, 224, 3), dtype=tf.uint8)
augmenter = augment.RandAugment()
aug_image = augmenter.distort(image)
self.assertEqual((224, 224, 3), aug_image.shape)
def test_all_policy_ops(self):
"""Smoke test to be sure all augmentation functions can execute."""
prob = 1
magnitude = 10
replace_value = [128] * 3
cutout_const = 100
translate_const = 250
image = tf.ones((224, 224, 3), dtype=tf.uint8)
for op_name in augment.NAME_TO_FUNC:
func, _, args = augment._parse_policy_info(op_name,
prob,
magnitude,
replace_value,
cutout_const,
translate_const)
image = func(image, *args)
self.assertEqual((224, 224, 3), image.shape)
if __name__ == '__main__':
assert tf.version.VERSION.startswith('2.')
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common modules for callbacks."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import os
from absl import logging
import tensorflow as tf
from typing import Any, List, MutableMapping, Text
def get_callbacks(model_checkpoint: bool = True,
include_tensorboard: bool = True,
track_lr: bool = True,
write_model_weights: bool = True,
initial_step: int = 0,
model_dir: Text = None) -> List[tf.keras.callbacks.Callback]:
"""Get all callbacks."""
model_dir = model_dir or ''
callbacks = []
if model_checkpoint:
ckpt_full_path = os.path.join(model_dir, 'model.ckpt-{epoch:04d}')
callbacks.append(tf.keras.callbacks.ModelCheckpoint(
ckpt_full_path, save_weights_only=True, verbose=1))
if include_tensorboard:
callbacks.append(CustomTensorBoard(
log_dir=model_dir,
track_lr=track_lr,
initial_step=initial_step,
write_images=write_model_weights))
return callbacks
def get_scalar_from_tensor(t: tf.Tensor) -> int:
"""Utility function to convert a Tensor to a scalar."""
t = tf.keras.backend.get_value(t)
if callable(t):
return t()
else:
return t
class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
"""A customized TensorBoard callback that tracks additional datapoints.
Metrics tracked:
- Global learning rate
Attributes:
log_dir: the path of the directory where to save the log files to be
parsed by TensorBoard.
track_lr: `bool`, whether or not to track the global learning rate.
initial_step: the initial step, used for preemption recovery.
**kwargs: Additional arguments for backwards compatibility. Possible key
is `period`.
"""
# TODO(b/146499062): track params, flops, log lr, l2 loss,
# classification loss
def __init__(self,
log_dir: Text,
track_lr: bool = False,
initial_step: int = 0,
**kwargs):
super(CustomTensorBoard, self).__init__(log_dir=log_dir, **kwargs)
self.step = initial_step
self._track_lr = track_lr
def on_batch_begin(self,
epoch: int,
logs: MutableMapping[Text, Any] = None) -> None:
self.step += 1
if logs is None:
logs = {}
logs.update(self._calculate_metrics())
super(CustomTensorBoard, self).on_batch_begin(epoch, logs)
def on_epoch_begin(self,
epoch: int,
logs: MutableMapping[Text, Any] = None) -> None:
if logs is None:
logs = {}
metrics = self._calculate_metrics()
logs.update(metrics)
for k, v in metrics.items():
logging.info('Current %s: %f', k, v)
super(CustomTensorBoard, self).on_epoch_begin(epoch, logs)
def on_epoch_end(self,
epoch: int,
logs: MutableMapping[Text, Any] = None) -> None:
if logs is None:
logs = {}
metrics = self._calculate_metrics()
logs.update(metrics)
super(CustomTensorBoard, self).on_epoch_end(epoch, logs)
def _calculate_metrics(self) -> MutableMapping[Text, Any]:
logs = {}
if self._track_lr:
logs['learning_rate'] = self._calculate_lr()
return logs
def _calculate_lr(self) -> int:
"""Calculates the learning rate given the current step."""
lr = self._get_base_optimizer().lr
if callable(lr):
lr = lr(self.step)
return get_scalar_from_tensor(lr)
def _get_base_optimizer(self) -> tf.keras.optimizers.Optimizer:
"""Get the base optimizer used by the current model."""
optimizer = self.model.optimizer
# The optimizer might be wrapped by another class, so unwrap it
while hasattr(optimizer, '_optimizer'):
optimizer = optimizer._optimizer # pylint:disable=protected-access
return optimizer
# Lint as: python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs an Image Classification model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import pprint
from typing import Any, Tuple, Text, Optional, Mapping
from absl import app
from absl import flags
from absl import logging
import tensorflow.compat.v2 as tf
from official.modeling import performance
from official.modeling.hyperparams import params_dict
from official.utils import hyperparams_flags
from official.utils.logs import logger
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
from official.vision.image_classification import callbacks as custom_callbacks
from official.vision.image_classification import dataset_factory
from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs
from official.vision.image_classification.configs import configs
from official.vision.image_classification.efficientnet import efficientnet_model
from official.vision.image_classification.resnet import common
from official.vision.image_classification.resnet import resnet_model
MODELS = {
'efficientnet': efficientnet_model.EfficientNet.from_name,
'resnet': resnet_model.resnet50,
}
def _get_metrics(one_hot: bool) -> Mapping[Text, Any]:
"""Get a dict of available metrics to track."""
if one_hot:
return {
# (name, metric_fn)
'acc': tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
'accuracy': tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
'top_1': tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
'top_5': tf.keras.metrics.TopKCategoricalAccuracy(
k=5,
name='top_5_accuracy'),
}
else:
return {
# (name, metric_fn)
'acc': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
'accuracy': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
'top_1': tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy'),
'top_5': tf.keras.metrics.SparseTopKCategoricalAccuracy(
k=5,
name='top_5_accuracy'),
}
def get_image_size_from_model(
params: base_configs.ExperimentConfig) -> Optional[int]:
"""If the given model has a preferred image size, return it."""
if params.model_name == 'efficientnet':
efficientnet_name = params.model.model_params.model_name
if efficientnet_name in efficientnet_model.MODEL_CONFIGS:
return efficientnet_model.MODEL_CONFIGS[efficientnet_name].resolution
return None
def _get_dataset_builders(params: base_configs.ExperimentConfig,
strategy: tf.distribute.Strategy,
one_hot: bool
) -> Tuple[Any, Any, Any]:
"""Create and return train, validation, and test dataset builders."""
if one_hot:
logging.warning('label_smoothing > 0, so datasets will be one hot encoded.')
else:
logging.warning('label_smoothing not applied, so datasets will not be one '
'hot encoded.')
num_devices = strategy.num_replicas_in_sync
image_size = get_image_size_from_model(params)
dataset_configs = [
params.train_dataset, params.validation_dataset, params.test_dataset
]
builders = []
for config in dataset_configs:
if config is not None and config.has_data:
builder = dataset_factory.DatasetBuilder(
config,
image_size=image_size or config.image_size,
num_devices=num_devices,
one_hot=one_hot)
else:
builder = None
builders.append(builder)
return builders
def get_loss_scale(params: base_configs.ExperimentConfig,
fp16_default: float = 128.) -> float:
"""Returns the loss scale for initializations."""
loss_scale = params.model.loss.loss_scale
if loss_scale == 'dynamic':
return loss_scale
elif loss_scale is not None:
return float(loss_scale)
elif params.train_dataset.dtype == 'float32':
return 1.
else:
assert params.train_dataset.dtype == 'float16'
return fp16_default
def _get_params_from_flags(flags_obj: flags.FlagValues):
"""Get ParamsDict from flags."""
model = flags_obj.model_type.lower()
dataset = flags_obj.dataset.lower()
params = configs.get_config(model=model, dataset=dataset)
flags_overrides = {
'model_dir': flags_obj.model_dir,
'mode': flags_obj.mode,
'model': {
'name': model,
},
'runtime': {
'enable_eager': flags_obj.enable_eager,
'tpu': flags_obj.tpu,
},
'train_dataset': {
'data_dir': flags_obj.data_dir,
},
'validation_dataset': {
'data_dir': flags_obj.data_dir,
},
'test_dataset': {
'data_dir': flags_obj.data_dir,
},
}
overriding_configs = (flags_obj.config_file,
flags_obj.params_override,
flags_overrides)
pp = pprint.PrettyPrinter()
logging.info('Base params: %s', pp.pformat(params.as_dict()))
for param in overriding_configs:
logging.info('Overriding params: %s', param)
# Set is_strict to false because we can have dynamic dict parameters.
params = params_dict.override_params_dict(params, param, is_strict=False)
params.validate()
params.lock()
logging.info('Final model parameters: %s', pp.pformat(params.as_dict()))
return params
def resume_from_checkpoint(model: tf.keras.Model,
model_dir: str,
train_steps: int) -> int:
"""Resumes from the latest checkpoint, if possible.
Loads the model weights and optimizer settings from a checkpoint.
This function should be used in case of preemption recovery.
Args:
model: The model whose weights should be restored.
model_dir: The directory where model weights were saved.
train_steps: The number of steps to train.
Returns:
The epoch of the latest checkpoint, or 0 if not restoring.
"""
logging.info('Load from checkpoint is enabled.')
latest_checkpoint = tf.train.latest_checkpoint(model_dir)
logging.info('latest_checkpoint: %s', latest_checkpoint)
if not latest_checkpoint:
logging.info('No checkpoint detected.')
return 0
logging.info('Checkpoint file %s found and restoring from '
'checkpoint', latest_checkpoint)
model.load_weights(latest_checkpoint)
initial_epoch = model.optimizer.iterations // train_steps
logging.info('Completed loading from checkpoint.')
logging.info('Resuming from epoch %d', initial_epoch)
return int(initial_epoch)
def initialize(params: base_configs.ExperimentConfig):
"""Initializes backend related initializations."""
keras_utils.set_session_config(
enable_eager=params.runtime.enable_eager,
enable_xla=params.runtime.enable_xla)
if params.runtime.gpu_threads_enabled:
keras_utils.set_gpu_thread_mode_and_count(
per_gpu_thread_count=params.runtime.per_gpu_thread_count,
gpu_thread_mode=params.runtime.gpu_thread_mode,
num_gpus=params.runtime.num_gpus,
datasets_num_private_threads=params.runtime.dataset_num_private_threads)
dataset = params.train_dataset or params.validation_dataset
performance.set_mixed_precision_policy(dataset.dtype)
if dataset.data_format:
data_format = dataset.data_format
elif tf.config.list_physical_devices('GPU'):
data_format = 'channels_first'
else:
data_format = 'channels_last'
tf.keras.backend.set_image_data_format(data_format)
distribution_utils.configure_cluster(
params.runtime.worker_hosts,
params.runtime.task_index)
if params.runtime.enable_eager:
# Enable eager execution to allow step-by-step debugging
tf.config.experimental_run_functions_eagerly(True)
def define_classifier_flags():
"""Defines common flags for image classification."""
hyperparams_flags.initialize_common_flags()
flags.DEFINE_string(
'data_dir',
default=None,
help='The location of the input data.')
flags.DEFINE_string(
'mode',
default=None,
help='Mode to run: `train`, `eval`, `train_and_eval` or `export`.')
flags.DEFINE_bool(
'enable_eager',
default=None,
help='Use eager execution and disable autograph for debugging.')
flags.DEFINE_string(
'model_type',
default=None,
help='The type of the model, e.g. EfficientNet, etc.')
flags.DEFINE_string(
'dataset',
default=None,
help='The name of the dataset, e.g. ImageNet, etc.')
def serialize_config(params: base_configs.ExperimentConfig,
model_dir: str):
"""Serializes and saves the experiment config."""
params_save_path = os.path.join(model_dir, 'params.yaml')
logging.info('Saving experiment configuration to %s', params_save_path)
tf.io.gfile.makedirs(model_dir)
params_dict.save_params_dict_to_yaml(params, params_save_path)
def train_and_eval(
params: base_configs.ExperimentConfig,
strategy_override: tf.distribute.Strategy) -> Mapping[str, Any]:
"""Runs the train and eval path using compile/fit."""
logging.info('Running train and eval.')
# Note: for TPUs, strategy and scope should be created before the dataset
strategy = strategy_override or distribution_utils.get_distribution_strategy(
distribution_strategy=params.runtime.distribution_strategy,
all_reduce_alg=params.runtime.all_reduce_alg,
num_gpus=params.runtime.num_gpus,
tpu_address=params.runtime.tpu)
strategy_scope = distribution_utils.get_strategy_scope(strategy)
logging.info('Detected %d devices.', strategy.num_replicas_in_sync)
label_smoothing = params.model.loss.label_smoothing
one_hot = label_smoothing and label_smoothing > 0
builders = _get_dataset_builders(params, strategy, one_hot)
datasets = [builder.build() if builder else None for builder in builders]
# Unpack datasets and builders based on train/val/test splits
train_builder, validation_builder, test_builder = builders # pylint: disable=unbalanced-tuple-unpacking
train_dataset, validation_dataset, test_dataset = datasets
train_epochs = params.train.epochs
train_steps = params.train.steps or train_builder.num_steps
validation_steps = params.evaluation.steps or validation_builder.num_steps
logging.info('Global batch size: %d', train_builder.global_batch_size)
with strategy_scope:
model_params = params.model.model_params.as_dict()
model = MODELS[params.model.name](**model_params)
learning_rate = optimizer_factory.build_learning_rate(
params=params.model.learning_rate,
batch_size=train_builder.global_batch_size,
train_steps=train_steps)
optimizer = optimizer_factory.build_optimizer(
optimizer_name=params.model.optimizer.name,
base_learning_rate=learning_rate,
params=params.model.optimizer.as_dict())
metrics_map = _get_metrics(one_hot)
metrics = [metrics_map[metric] for metric in params.train.metrics]
if one_hot:
loss_obj = tf.keras.losses.CategoricalCrossentropy(
label_smoothing=params.model.loss.label_smoothing)
else:
loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
model.compile(optimizer=optimizer,
loss=loss_obj,
metrics=metrics,
run_eagerly=params.runtime.enable_eager)
initial_epoch = 0
if params.train.resume_checkpoint:
initial_epoch = resume_from_checkpoint(model=model,
model_dir=params.model_dir,
train_steps=train_steps)
serialize_config(params=params, model_dir=params.model_dir)
# TODO(dankondratyuk): callbacks significantly slow down training
callbacks = custom_callbacks.get_callbacks(
model_checkpoint=params.train.callbacks.enable_checkpoint_and_export,
include_tensorboard=params.train.callbacks.enable_tensorboard,
track_lr=params.train.tensorboard.track_lr,
write_model_weights=params.train.tensorboard.write_model_weights,
initial_step=initial_epoch * train_steps,
model_dir=params.model_dir)
history = model.fit(
train_dataset,
epochs=train_epochs,
steps_per_epoch=train_steps,
initial_epoch=initial_epoch,
callbacks=callbacks,
validation_data=validation_dataset,
validation_steps=validation_steps,
validation_freq=params.evaluation.epochs_between_evals)
validation_output = model.evaluate(
validation_dataset, steps=validation_steps, verbose=2)
# TODO(dankondratyuk): eval and save final test accuracy
stats = common.build_stats(history,
validation_output,
callbacks)
return stats
def export(params: base_configs.ExperimentConfig):
"""Runs the model export functionality."""
logging.info('Exporting model.')
model_params = params.model.model_params.as_dict()
model = MODELS[params.model.name](**model_params)
checkpoint = params.export.checkpoint
if checkpoint is None:
logging.info('No export checkpoint was provided. Using the latest '
'checkpoint from model_dir.')
checkpoint = tf.train.latest_checkpoint(params.model_dir)
model.load_weights(checkpoint)
model.save(params.export.destination)
def run(flags_obj: flags.FlagValues,
strategy_override: tf.distribute.Strategy = None) -> Mapping[str, Any]:
"""Runs Image Classification model using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
strategy_override: A `tf.distribute.Strategy` object to use for model.
Returns:
Dictionary of training/eval stats
"""
params = _get_params_from_flags(flags_obj)
initialize(params)
if params.mode == 'train_and_eval':
return train_and_eval(params, strategy_override)
elif params.mode == 'export_only':
export(params)
else:
raise ValueError('{} is not a valid mode.'.format(params.mode))
def main(_):
with logger.benchmark_context(flags.FLAGS):
stats = run(flags.FLAGS)
if stats:
logging.info('Run stats:\n%s', stats)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_classifier_flags()
flags.mark_flag_as_required('data_dir')
flags.mark_flag_as_required('mode')
flags.mark_flag_as_required('model_type')
flags.mark_flag_as_required('dataset')
assert tf.version.VERSION.startswith('2.')
app.run(main)
# Lint as: python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Unit tests for the classifier trainer models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import functools
import json
import os
import sys
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional, Tuple
from absl import flags
from absl.testing import parameterized
import tensorflow.compat.v2 as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.utils.flags import core as flags_core
from official.vision.image_classification import classifier_trainer
from official.vision.image_classification import dataset_factory
from official.vision.image_classification import test_utils
from official.vision.image_classification.configs import base_configs
classifier_trainer.define_classifier_flags()
def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]:
"""Returns the combinations of end-to-end tests to run."""
return combinations.combine(
distribution=[
strategy_combinations.default_strategy,
strategy_combinations.tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],
model=[
'efficientnet',
'resnet',
],
mode='eager',
dataset=[
'imagenet',
],
)
def get_params_override(params_override: Mapping[str, Any]) -> str:
"""Converts params_override dict to string command."""
return '--params_override=' + json.dumps(params_override)
def basic_params_override() -> MutableMapping[str, Any]:
"""Returns a basic parameter configuration for testing."""
return {
'train_dataset': {
'builder': 'synthetic',
'use_per_replica_batch_size': True,
'batch_size': 1,
'image_size': 224,
},
'validation_dataset': {
'builder': 'synthetic',
'batch_size': 1,
'use_per_replica_batch_size': True,
'image_size': 224,
},
'test_dataset': {
'builder': 'synthetic',
'batch_size': 1,
'use_per_replica_batch_size': True,
'image_size': 224,
},
'train': {
'steps': 1,
'epochs': 1,
'callbacks': {
'enable_checkpoint_and_export': True,
'enable_tensorboard': False,
},
},
'evaluation': {
'steps': 1,
},
}
def get_trivial_model(num_classes: int) -> tf.keras.Model:
"""Creates and compiles trivial model for ImageNet dataset."""
model = test_utils.trivial_model(num_classes=num_classes)
lr = 0.01
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)
loss_obj = tf.keras.losses.SparseCategoricalCrossentropy()
model.compile(optimizer=optimizer,
loss=loss_obj,
run_eagerly=True)
return model
def get_trivial_data() -> tf.data.Dataset:
"""Gets trivial data in the ImageNet size."""
def generate_data(_) -> tf.data.Dataset:
image = tf.zeros(shape=(224, 224, 3), dtype=tf.float32)
label = tf.zeros([1], dtype=tf.int32)
return image, label
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(generate_data,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.prefetch(buffer_size=1).batch(1)
return dataset
def run_end_to_end(main: Callable[[Any], None],
extra_flags: Optional[Iterable[str]] = None,
model_dir: Optional[str] = None):
"""Runs the classifier trainer end-to-end."""
extra_flags = [] if extra_flags is None else extra_flags
args = [sys.argv[0], '--model_dir', model_dir] + extra_flags
flags_core.parse_flags(argv=args)
main(flags.FLAGS)
class ClassifierTest(tf.test.TestCase, parameterized.TestCase):
"""Unit tests for Keras models."""
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(ClassifierTest, cls).setUpClass()
def tearDown(self):
super(ClassifierTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
@combinations.generate(distribution_strategy_combinations())
def test_end_to_end_train_and_eval_export(self, distribution, model, dataset):
"""Test train_and_eval and export for Keras classifier models."""
# Some parameters are not defined as flags (e.g. cannot run
# classifier_train.py --batch_size=...) by design, so use
# "--params_override=..." instead
model_dir = self.get_temp_dir()
base_flags = [
'--data_dir=not_used',
'--model_type=' + model,
'--dataset=' + dataset,
]
train_and_eval_flags = base_flags + [
get_params_override(basic_params_override()),
'--mode=train_and_eval',
]
export_params = basic_params_override()
export_path = os.path.join(model_dir, 'export')
export_params['export'] = {}
export_params['export']['destination'] = export_path
export_flags = base_flags + [
'--mode=export_only',
get_params_override(export_params)
]
run = functools.partial(classifier_trainer.run,
strategy_override=distribution)
run_end_to_end(main=run,
extra_flags=train_and_eval_flags,
model_dir=model_dir)
run_end_to_end(main=run,
extra_flags=export_flags,
model_dir=model_dir)
self.assertTrue(os.path.exists(export_path))
@combinations.generate(distribution_strategy_combinations())
def test_end_to_end_invalid_mode(self, distribution, model, dataset):
"""Test the Keras EfficientNet model with `strategy`."""
model_dir = self.get_temp_dir()
extra_flags = [
'--data_dir=not_used',
'--mode=invalid_mode',
'--model_type=' + model,
'--dataset=' + dataset,
get_params_override(basic_params_override()),
]
run = functools.partial(classifier_trainer.run,
strategy_override=distribution)
with self.assertRaises(ValueError):
run_end_to_end(main=run, extra_flags=extra_flags, model_dir=model_dir)
class UtilTests(parameterized.TestCase, tf.test.TestCase):
"""Tests for individual utility functions within classifier_trainer.py."""
@parameterized.named_parameters(
('efficientnet-b0', 'efficientnet', 'efficientnet-b0', 224),
('efficientnet-b1', 'efficientnet', 'efficientnet-b1', 240),
('efficientnet-b2', 'efficientnet', 'efficientnet-b2', 260),
('efficientnet-b3', 'efficientnet', 'efficientnet-b3', 300),
('efficientnet-b4', 'efficientnet', 'efficientnet-b4', 380),
('efficientnet-b5', 'efficientnet', 'efficientnet-b5', 456),
('efficientnet-b6', 'efficientnet', 'efficientnet-b6', 528),
('efficientnet-b7', 'efficientnet', 'efficientnet-b7', 600),
('resnet', 'resnet', '', None),
)
def test_get_model_size(self, model, model_name, expected):
config = base_configs.ExperimentConfig(
model_name=model,
model=base_configs.ModelConfig(
model_params={
'model_name': model_name,
},
)
)
size = classifier_trainer.get_image_size_from_model(config)
self.assertEqual(size, expected)
@parameterized.named_parameters(
('dynamic', 'dynamic', None, 'dynamic'),
('scalar', 128., None, 128.),
('float32', None, 'float32', 1),
('float16', None, 'float16', 128),
)
def test_get_loss_scale(self, loss_scale, dtype, expected):
config = base_configs.ExperimentConfig(
model=base_configs.ModelConfig(
loss=base_configs.LossConfig(loss_scale=loss_scale)),
train_dataset=dataset_factory.DatasetConfig(dtype=dtype))
ls = classifier_trainer.get_loss_scale(config, fp16_default=128)
self.assertEqual(ls, expected)
@parameterized.named_parameters(
('float16', 'float16'),
('bfloat16', 'bfloat16')
)
def test_initialize(self, dtype):
config = base_configs.ExperimentConfig(
runtime=base_configs.RuntimeConfig(
enable_eager=False,
enable_xla=False,
gpu_threads_enabled=True,
per_gpu_thread_count=1,
gpu_thread_mode='gpu_private',
num_gpus=1,
dataset_num_private_threads=1,
),
train_dataset=dataset_factory.DatasetConfig(dtype=dtype),
model=base_configs.ModelConfig(
loss=base_configs.LossConfig(loss_scale='dynamic')),
)
classifier_trainer.initialize(config)
def test_resume_from_checkpoint(self):
"""Tests functionality for resuming from checkpoint."""
# Set the keras policy
policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16')
tf.keras.mixed_precision.experimental.set_policy(policy)
# Get the model, datasets, and compile it.
model = get_trivial_model(10)
# Create the checkpoint
model_dir = self.get_temp_dir()
train_epochs = 1
train_steps = 10
ds = get_trivial_data()
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
os.path.join(model_dir, 'model.ckpt-{epoch:04d}'),
save_weights_only=True)
]
model.fit(
ds,
callbacks=callbacks,
epochs=train_epochs,
steps_per_epoch=train_steps)
# Test load from checkpoint
clean_model = get_trivial_model(10)
weights_before_load = copy.deepcopy(clean_model.get_weights())
initial_epoch = classifier_trainer.resume_from_checkpoint(
model=clean_model,
model_dir=model_dir,
train_steps=train_steps)
self.assertEqual(initial_epoch, 1)
self.assertNotAllClose(weights_before_load, clean_model.get_weights())
tf.io.gfile.rmtree(model_dir)
def test_serialize_config(self):
"""Tests functionality for serializing data."""
config = base_configs.ExperimentConfig()
model_dir = self.get_temp_dir()
classifier_trainer.serialize_config(params=config, model_dir=model_dir)
saved_params_path = os.path.join(model_dir, 'params.yaml')
self.assertTrue(os.path.exists(saved_params_path))
tf.io.gfile.rmtree(model_dir)
if __name__ == '__main__':
assert tf.version.VERSION.startswith('2.')
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment