Unverified Commit 1f3247f4 authored by Ayushman Kumar's avatar Ayushman Kumar Committed by GitHub
Browse files

Merge pull request #6 from tensorflow/master

Updated
parents 370a4c8d 0265f59c
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Definitions for high level configuration groups.."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, List, Mapping, Optional
import dataclasses
from official.modeling.hyperparams import base_config
CallbacksConfig = base_config.CallbacksConfig
TensorboardConfig = base_config.TensorboardConfig
RuntimeConfig = base_config.RuntimeConfig
@dataclasses.dataclass
class ExportConfig(base_config.Config):
"""Configuration for exports.
Attributes:
checkpoint: the path to the checkpoint to export.
destination: the path to where the checkpoint should be exported.
"""
checkpoint: str = None
destination: str = None
@dataclasses.dataclass
class MetricsConfig(base_config.Config):
"""Configuration for Metrics.
Attributes:
accuracy: Whether or not to track accuracy as a Callback. Defaults to None.
top_5: Whether or not to track top_5_accuracy as a Callback. Defaults to
None.
"""
accuracy: bool = None
top_5: bool = None
@dataclasses.dataclass
class TrainConfig(base_config.Config):
"""Configuration for training.
Attributes:
resume_checkpoint: Whether or not to enable load checkpoint loading.
Defaults to None.
epochs: The number of training epochs to run. Defaults to None.
steps: The number of steps to run per epoch. If None, then this will be
inferred based on the number of images and batch size. Defaults to None.
callbacks: An instance of CallbacksConfig.
metrics: An instance of MetricsConfig.
tensorboard: An instance of TensorboardConfig.
"""
resume_checkpoint: bool = None
epochs: int = None
steps: int = None
callbacks: CallbacksConfig = CallbacksConfig()
metrics: List[str] = None
tensorboard: TensorboardConfig = TensorboardConfig()
@dataclasses.dataclass
class EvalConfig(base_config.Config):
"""Configuration for evaluation.
Attributes:
epochs_between_evals: The number of train epochs to run between evaluations.
Defaults to None.
steps: The number of eval steps to run during evaluation. If None, this will
be inferred based on the number of images and batch size. Defaults to
None.
"""
epochs_between_evals: int = None
steps: int = None
@dataclasses.dataclass
class LossConfig(base_config.Config):
"""Configuration for Loss.
Attributes:
name: The name of the loss. Defaults to None.
loss_scale: The type of loss scale
label_smoothing: Whether or not to apply label smoothing to the loss. This
only applies to 'categorical_cross_entropy'.
"""
name: str = None
loss_scale: str = None
label_smoothing: float = None
@dataclasses.dataclass
class OptimizerConfig(base_config.Config):
"""Configuration for Optimizers.
Attributes:
name: The name of the optimizer. Defaults to None.
decay: Decay or rho, discounting factor for gradient. Defaults to None.
epsilon: Small value used to avoid 0 denominator. Defaults to None.
momentum: Plain momentum constant. Defaults to None.
nesterov: Whether or not to apply Nesterov momentum. Defaults to None.
moving_average_decay: The amount of decay to apply. If 0 or None, then
exponential moving average is not used. Defaults to None.
lookahead: Whether or not to apply the lookahead optimizer. Defaults to
None.
beta_1: The exponential decay rate for the 1st moment estimates. Used in
the Adam optimizers. Defaults to None.
beta_2: The exponential decay rate for the 2nd moment estimates. Used in
the Adam optimizers. Defaults to None.
epsilon: Small value used to avoid 0 denominator. Defaults to 1e-7.
"""
name: str = None
decay: float = None
epsilon: float = None
momentum: float = None
nesterov: bool = None
moving_average_decay: Optional[float] = None
lookahead: Optional[bool] = None
beta_1: float = None
beta_2: float = None
epsilon: float = None
@dataclasses.dataclass
class LearningRateConfig(base_config.Config):
"""Configuration for learning rates.
Attributes:
name: The name of the learning rate. Defaults to None.
initial_lr: The initial learning rate. Defaults to None.
decay_epochs: The number of decay epochs. Defaults to None.
decay_rate: The rate of decay. Defaults to None.
warmup_epochs: The number of warmup epochs. Defaults to None.
batch_lr_multiplier: The multiplier to apply to the base learning rate,
if necessary. Defaults to None.
examples_per_epoch: the number of examples in a single epoch.
Defaults to None.
boundaries: boundaries used in piecewise constant decay with warmup.
multipliers: multipliers used in piecewise constant decay with warmup.
scale_by_batch_size: Scale the learning rate by a fraction of the batch
size. Set to 0 for no scaling (default).
"""
name: str = None
initial_lr: float = None
decay_epochs: float = None
decay_rate: float = None
warmup_epochs: int = None
examples_per_epoch: int = None
boundaries: List[int] = None
multipliers: List[float] = None
scale_by_batch_size: float = 0.
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""Configuration for Models.
Attributes:
name: The name of the model. Defaults to None.
model_params: The parameters used to create the model. Defaults to None.
num_classes: The number of classes in the model. Defaults to None.
loss: A `LossConfig` instance. Defaults to None.
optimizer: An `OptimizerConfig` instance. Defaults to None.
"""
name: str = None
model_params: Mapping[str, Any] = None
num_classes: int = None
loss: LossConfig = None
optimizer: OptimizerConfig = None
@dataclasses.dataclass
class ExperimentConfig(base_config.Config):
"""Base configuration for an image classification experiment.
Attributes:
model_dir: The directory to use when running an experiment.
mode: e.g. 'train_and_eval', 'export'
runtime: A `RuntimeConfig` instance.
train: A `TrainConfig` instance.
evaluation: An `EvalConfig` instance.
model: A `ModelConfig` instance.
export: An `ExportConfig` instance.
"""
model_dir: str = None
model_name: str = None
mode: str = None
runtime: RuntimeConfig = None
train_dataset: Any = None
validation_dataset: Any = None
test_dataset: Any = None
train: TrainConfig = None
evaluation: EvalConfig = None
model: ModelConfig = None
export: ExportConfig = None
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Configuration utils for image classification experiments."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dataclasses
from official.vision.image_classification import dataset_factory
from official.vision.image_classification.configs import base_configs
from official.vision.image_classification.efficientnet import efficientnet_config
from official.vision.image_classification.resnet import resnet_config
@dataclasses.dataclass
class EfficientNetImageNetConfig(base_configs.ExperimentConfig):
"""Base configuration to train efficientnet-b0 on ImageNet.
Attributes:
export: An `ExportConfig` instance
runtime: A `RuntimeConfig` instance.
dataset: A `DatasetConfig` instance.
train: A `TrainConfig` instance.
evaluation: An `EvalConfig` instance.
model: A `ModelConfig` instance.
"""
export: base_configs.ExportConfig = base_configs.ExportConfig()
runtime: base_configs.RuntimeConfig = base_configs.RuntimeConfig()
train_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='train')
validation_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='validation')
test_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='validation')
train: base_configs.TrainConfig = base_configs.TrainConfig(
resume_checkpoint=True,
epochs=500,
steps=None,
callbacks=base_configs.CallbacksConfig(enable_checkpoint_and_export=True,
enable_tensorboard=True),
metrics=['accuracy', 'top_5'],
tensorboard=base_configs.TensorboardConfig(track_lr=True,
write_model_weights=False))
evaluation: base_configs.EvalConfig = base_configs.EvalConfig(
epochs_between_evals=1,
steps=None)
model: base_configs.ModelConfig = \
efficientnet_config.EfficientNetModelConfig()
@dataclasses.dataclass
class ResNetImagenetConfig(base_configs.ExperimentConfig):
"""Base configuration to train resnet-50 on ImageNet."""
export: base_configs.ExportConfig = base_configs.ExportConfig()
runtime: base_configs.RuntimeConfig = base_configs.RuntimeConfig()
train_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='train',
one_hot=False,
mean_subtract=True,
standardize=True)
validation_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='validation',
one_hot=False,
mean_subtract=True,
standardize=True)
test_dataset: dataset_factory.DatasetConfig = \
dataset_factory.ImageNetConfig(split='validation',
one_hot=False,
mean_subtract=True,
standardize=True)
train: base_configs.TrainConfig = base_configs.TrainConfig(
resume_checkpoint=True,
epochs=90,
steps=None,
callbacks=base_configs.CallbacksConfig(enable_checkpoint_and_export=True,
enable_tensorboard=True),
metrics=['accuracy', 'top_5'],
tensorboard=base_configs.TensorboardConfig(track_lr=True,
write_model_weights=False))
evaluation: base_configs.EvalConfig = base_configs.EvalConfig(
epochs_between_evals=1,
steps=None)
model: base_configs.ModelConfig = resnet_config.ResNetModelConfig()
def get_config(model: str, dataset: str) -> base_configs.ExperimentConfig:
"""Given model and dataset names, return the ExperimentConfig."""
dataset_model_config_map = {
'imagenet': {
'efficientnet': EfficientNetImageNetConfig(),
'resnet': ResNetImagenetConfig(),
}
}
try:
return dataset_model_config_map[dataset][model]
except KeyError:
if dataset not in dataset_model_config_map:
raise KeyError('Invalid dataset received. Received: {}. Supported '
'datasets include: {}'.format(
dataset,
', '.join(dataset_model_config_map.keys())))
raise KeyError('Invalid model received. Received: {}. Supported models for'
'{} include: {}'.format(
model,
dataset,
', '.join(dataset_model_config_map[dataset].keys())))
# Training configuration for EfficientNet-b0 trained on ImageNet on GPUs.
# Takes ~32 minutes per epoch for 8 V100s.
# Reaches ~76.1% within 350 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'mirrored'
num_gpus: 1
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 32
use_per_replica_batch_size: True
dtype: 'float32'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 32
use_per_replica_batch_size: True
dtype: 'float32'
model:
model_params:
model_name: 'efficientnet-b0'
overrides:
num_classes: 1000
batch_norm: 'default'
dtype: 'float32'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: True
epochs: 500
evaluation:
epochs_between_evals: 1
# Training configuration for EfficientNet-b0 trained on ImageNet on TPUs.
# Takes ~2 minutes, 50 seconds per epoch for v3-32.
# Reaches ~76.1% within 350 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: True
dtype: 'bfloat16'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: True
dtype: 'bfloat16'
model:
model_params:
model_name: 'efficientnet-b0'
overrides:
num_classes: 1000
batch_norm: 'tpu'
dtype: 'bfloat16'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
moving_average_decay: 0.
lookahead: false
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: True
epochs: 500
evaluation:
epochs_between_evals: 1
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'mirrored'
num_gpus: 1
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 32
dtype: 'float32'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 32
dtype: 'float32'
model:
model_params:
model_name: 'efficientnet-b1'
overrides:
num_classes: 1000
batch_norm: 'default'
dtype: 'float32'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: True
epochs: 500
evaluation:
epochs_between_evals: 1
# Training configuration for EfficientNet-b1 trained on ImageNet on TPUs.
# Takes ~3 minutes, 15 seconds per epoch for v3-32.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: True
dtype: 'bfloat16'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: True
dtype: 'bfloat16'
model:
model_params:
model_name: 'efficientnet-b1'
overrides:
num_classes: 1000
batch_norm: 'tpu'
dtype: 'bfloat16'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: True
epochs: 500
evaluation:
epochs_between_evals: 1
# Training configuration for ResNet trained on ImageNet on GPUs.
# Takes ~3 minutes, 15 seconds per epoch for 8 V100s.
# Reaches ~76.1% within 90 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'mirrored'
num_gpus: 1
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
image_size: 224
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: True
dtype: 'float32'
mean_subtract: True
standardize: True
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
image_size: 224
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: True
dtype: 'float32'
mean_subtract: True
standardize: True
model:
model_name: 'resnet'
model_params:
rescale_inputs: False
optimizer:
name: 'momentum'
momentum: 0.9
decay: 0.9
epsilon: 0.001
learning_rate:
name: 'piecewise_constant_with_warmup'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: True
epochs: 90
evaluation:
epochs_between_evals: 1
# Training configuration for ResNet trained on ImageNet on TPUs.
# Takes ~2 minutes, 43 seconds per epoch for a v3-32.
# Reaches ~76.1% within 90 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
model_dir: null
mode: 'train_and_eval'
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
one_hot: False
image_size: 224
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: True
mean_subtract: True
standardize: True
dtype: 'bfloat16'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
one_hot: False
image_size: 224
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: True
mean_subtract: True
standardize: True
dtype: 'bfloat16'
model:
model_name: 'resnet'
model_params:
rescale_inputs: False
optimizer:
name: 'momentum'
momentum: 0.9
decay: 0.9
epsilon: 0.001
moving_average_decay: 0.
lookahead: false
learning_rate:
name: 'piecewise_constant_with_warmup'
loss:
label_smoothing: 0.1
train:
callbacks:
enable_checkpoint_and_export: True
resume_checkpoint: True
epochs: 90
evaluation:
epochs_between_evals: 1
# Lint as: python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataset utilities for vision tasks using TFDS and tf.data.Dataset."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import os
from typing import Any, List, Optional, Tuple, Mapping, Union
from absl import logging
from dataclasses import dataclass
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from official.modeling.hyperparams import base_config
from official.vision.image_classification import augment
from official.vision.image_classification import preprocessing
AUGMENTERS = {
'autoaugment': augment.AutoAugment,
'randaugment': augment.RandAugment,
}
@dataclass
class AugmentConfig(base_config.Config):
"""Configuration for image augmenters.
Attributes:
name: The name of the image augmentation to use. Possible options are
None (default), 'autoaugment', or 'randaugment'.
params: Any paramaters used to initialize the augmenter.
"""
name: Optional[str] = None
params: Optional[Mapping[str, Any]] = None
def build(self) -> augment.ImageAugment:
"""Build the augmenter using this config."""
params = self.params or {}
augmenter = AUGMENTERS.get(self.name, None)
return augmenter(**params) if augmenter is not None else None
@dataclass
class DatasetConfig(base_config.Config):
"""The base configuration for building datasets.
Attributes:
name: The name of the Dataset. Usually should correspond to a TFDS dataset.
data_dir: The path where the dataset files are stored, if available.
filenames: Optional list of strings representing the TFRecord names.
builder: The builder type used to load the dataset. Value should be one of
'tfds' (load using TFDS), 'records' (load from TFRecords), or 'synthetic'
(generate dummy synthetic data without reading from files).
split: The split of the dataset. Usually 'train', 'validation', or 'test'.
image_size: The size of the image in the dataset. This assumes that
`width` == `height`. Set to 'infer' to infer the image size from TFDS
info. This requires `name` to be a registered dataset in TFDS.
num_classes: The number of classes given by the dataset. Set to 'infer'
to infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
num_channels: The number of channels given by the dataset. Set to 'infer'
to infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
num_examples: The number of examples given by the dataset. Set to 'infer'
to infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
batch_size: The base batch size for the dataset.
use_per_replica_batch_size: Whether to scale the batch size based on
available resources. If set to `True`, the dataset builder will return
batch_size multiplied by `num_devices`, the number of device replicas
(e.g., the number of GPUs or TPU cores).
num_devices: The number of replica devices to use. This should be set by
`strategy.num_replicas_in_sync` when using a distribution strategy.
data_format: The data format of the images. Should be 'channels_last' or
'channels_first'.
dtype: The desired dtype of the dataset. This will be set during
preprocessing.
one_hot: Whether to apply one hot encoding. Set to `True` to be able to use
label smoothing.
augmenter: The augmenter config to use. No augmentation is used by default.
download: Whether to download data using TFDS.
shuffle_buffer_size: The buffer size used for shuffling training data.
file_shuffle_buffer_size: The buffer size used for shuffling raw training
files.
skip_decoding: Whether to skip image decoding when loading from TFDS.
deterministic_train: Whether the examples in the training set should output
in a deterministic order.
use_slack: whether to introduce slack in the last prefetch. This may reduce
CPU contention at the start of a training step.
cache: whether to cache to dataset examples. Can be used to avoid re-reading
from disk on the second epoch. Requires significant memory overhead.
mean_subtract: whether or not to apply mean subtraction to the dataset.
standardize: whether or not to apply standardization to the dataset.
"""
name: Optional[str] = None
data_dir: Optional[str] = None
filenames: Optional[List[str]] = None
builder: str = 'tfds'
split: str = 'train'
image_size: Union[int, str] = 'infer'
num_classes: Union[int, str] = 'infer'
num_channels: Union[int, str] = 'infer'
num_examples: Union[int, str] = 'infer'
batch_size: int = 128
use_per_replica_batch_size: bool = False
num_devices: int = 1
data_format: str = 'channels_last'
dtype: str = 'float32'
one_hot: bool = True
augmenter: AugmentConfig = AugmentConfig()
download: bool = False
shuffle_buffer_size: int = 10000
file_shuffle_buffer_size: int = 1024
skip_decoding: bool = True
deterministic_train: bool = False
use_slack: bool = True
cache: bool = False
mean_subtract: bool = False
standardize: bool = False
@property
def has_data(self):
"""Whether this dataset is has any data associated with it."""
return self.name or self.data_dir or self.filenames
@dataclass
class ImageNetConfig(DatasetConfig):
"""The base ImageNet dataset config."""
name: str = 'imagenet2012'
# Note: for large datasets like ImageNet, using records is faster than tfds
builder: str = 'records'
image_size: int = 224
batch_size: int = 128
@dataclass
class Cifar10Config(DatasetConfig):
"""The base CIFAR-10 dataset config."""
name: str = 'cifar10'
image_size: int = 224
batch_size: int = 128
download: bool = True
cache: bool = True
class DatasetBuilder:
"""An object for building datasets.
Allows building various pipelines fetching examples, preprocessing, etc.
Maintains additional state information calculated from the dataset, i.e.,
training set split, batch size, and number of steps (batches).
"""
def __init__(self, config: DatasetConfig, **overrides: Any):
"""Initialize the builder from the config."""
self.config = config.replace(**overrides)
self.builder_info = None
if self.config.augmenter is not None:
logging.info('Using augmentation: %s', self.config.augmenter.name)
self.augmenter = self.config.augmenter.build()
else:
self.augmenter = None
@property
def is_training(self) -> bool:
"""Whether this is the training set."""
return self.config.split == 'train'
@property
def batch_size(self) -> int:
"""The batch size, multiplied by the number of replicas (if configured)."""
if self.config.use_per_replica_batch_size:
return self.global_batch_size
else:
return self.config.batch_size
@property
def global_batch_size(self):
"""The global batch size across all replicas."""
return self.config.batch_size * self.config.num_devices
@property
def num_steps(self) -> int:
"""The number of steps (batches) to exhaust this dataset."""
# Always divide by the global batch size to get the correct # of steps
return self.num_examples // self.global_batch_size
@property
def image_size(self) -> int:
"""The size of each image (can be inferred from the dataset)."""
if self.config.image_size == 'infer':
return self.info.features['image'].shape[0]
else:
return int(self.config.image_size)
@property
def num_channels(self) -> int:
"""The number of image channels (can be inferred from the dataset)."""
if self.config.num_channels == 'infer':
return self.info.features['image'].shape[-1]
else:
return int(self.config.num_channels)
@property
def num_examples(self) -> int:
"""The number of examples (can be inferred from the dataset)."""
if self.config.num_examples == 'infer':
return self.info.splits[self.config.split].num_examples
else:
return int(self.config.num_examples)
@property
def num_classes(self) -> int:
"""The number of classes (can be inferred from the dataset)."""
if self.config.num_classes == 'infer':
return self.info.features['label'].num_classes
else:
return int(self.config.num_classes)
@property
def info(self) -> tfds.core.DatasetInfo:
"""The TFDS dataset info, if available."""
if self.builder_info is None:
self.builder_info = tfds.builder(self.config.name).info
return self.builder_info
def build(self, input_context: tf.distribute.InputContext = None
) -> tf.data.Dataset:
"""Construct a dataset end-to-end and return it.
Args:
input_context: An optional context provided by `tf.distribute` for
cross-replica training. This isn't necessary if using Keras
compile/fit.
Returns:
A TensorFlow dataset outputting batched images and labels.
"""
builders = {
'tfds': self.load_tfds,
'records': self.load_records,
'synthetic': self.load_synthetic,
}
builder = builders.get(self.config.builder, None)
if builder is None:
raise ValueError('Unknown builder type {}'.format(self.config.builder))
dataset = builder()
dataset = self.pipeline(dataset, input_context)
return dataset
def load_tfds(self) -> tf.data.Dataset:
"""Return a dataset loading files from TFDS."""
logging.info('Using TFDS to load data.')
builder = tfds.builder(self.config.name,
data_dir=self.config.data_dir)
if self.config.download:
builder.download_and_prepare()
decoders = {}
if self.config.skip_decoding:
decoders['image'] = tfds.decode.SkipDecoding()
read_config = tfds.ReadConfig(
interleave_parallel_reads=64,
interleave_block_length=1)
dataset = builder.as_dataset(
split=self.config.split,
as_supervised=True,
shuffle_files=True,
decoders=decoders,
read_config=read_config)
return dataset
def load_records(self) -> tf.data.Dataset:
"""Return a dataset loading files with TFRecords."""
logging.info('Using TFRecords to load data.')
if self.config.filenames is None:
if self.config.data_dir is None:
raise ValueError('Dataset must specify a path for the data files.')
file_pattern = os.path.join(self.config.data_dir,
'{}*'.format(self.config.split))
dataset = tf.data.Dataset.list_files(file_pattern, shuffle=True)
else:
dataset = tf.data.Dataset.from_tensor_slices(self.config.filenames)
if self.is_training:
# Shuffle the input files.
dataset.shuffle(buffer_size=self.config.file_shuffle_buffer_size)
return dataset
def load_synthetic(self) -> tf.data.Dataset:
"""Return a dataset generating dummy synthetic data."""
logging.info('Generating a synthetic dataset.')
def generate_data(_):
image = tf.zeros([self.image_size, self.image_size, self.num_channels],
dtype=self.config.dtype)
label = tf.zeros([1], dtype=tf.int32)
return image, label
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(generate_data,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset
def pipeline(self,
dataset: tf.data.Dataset,
input_context: tf.distribute.InputContext = None
) -> tf.data.Dataset:
"""Build a pipeline fetching, shuffling, and preprocessing the dataset.
Args:
dataset: A `tf.data.Dataset` that loads raw files.
input_context: An optional context provided by `tf.distribute` for
cross-replica training. This isn't necessary if using Keras
compile/fit.
Returns:
A TensorFlow dataset outputting batched images and labels.
"""
if input_context and input_context.num_input_pipelines > 1:
dataset = dataset.shard(input_context.num_input_pipelines,
input_context.input_pipeline_id)
if self.is_training and not self.config.cache:
dataset = dataset.repeat()
if self.config.builder == 'records':
# Read the data from disk in parallel
buffer_size = 8 * 1024 * 1024 # Use 8 MiB per file
dataset = dataset.interleave(
lambda name: tf.data.TFRecordDataset(name, buffer_size=buffer_size),
cycle_length=16,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.prefetch(self.global_batch_size)
if self.config.cache:
dataset = dataset.cache()
if self.is_training:
dataset = dataset.shuffle(self.config.shuffle_buffer_size)
dataset = dataset.repeat()
# Parse, pre-process, and batch the data in parallel
if self.config.builder == 'records':
preprocess = self.parse_record
else:
preprocess = self.preprocess
dataset = dataset.map(preprocess,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(self.batch_size, drop_remainder=self.is_training)
# Note: we could do image normalization here, but we defer it to the model
# which can perform it much faster on a GPU/TPU
# TODO(dankondratyuk): if we fix prefetching, we can do it here
if self.is_training and self.config.deterministic_train is not None:
options = tf.data.Options()
options.experimental_deterministic = self.config.deterministic_train
options.experimental_slack = self.config.use_slack
options.experimental_optimization.parallel_batch = True
options.experimental_optimization.map_fusion = True
options.experimental_optimization.map_vectorization.enabled = True
options.experimental_optimization.map_parallelization = True
dataset = dataset.with_options(options)
# Prefetch overlaps in-feed with training
# Note: autotune here is not recommended, as this can lead to memory leaks.
# Instead, use a constant prefetch size like the the number of devices.
dataset = dataset.prefetch(self.config.num_devices)
return dataset
def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parse an ImageNet record from a serialized string Tensor."""
keys_to_features = {
'image/encoded':
tf.io.FixedLenFeature((), tf.string, ''),
'image/format':
tf.io.FixedLenFeature((), tf.string, 'jpeg'),
'image/class/label':
tf.io.FixedLenFeature([], tf.int64, -1),
'image/class/text':
tf.io.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin':
tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin':
tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax':
tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax':
tf.io.VarLenFeature(dtype=tf.float32),
'image/object/class/label':
tf.io.VarLenFeature(dtype=tf.int64),
}
parsed = tf.io.parse_single_example(record, keys_to_features)
label = tf.reshape(parsed['image/class/label'], shape=[1])
label = tf.cast(label, dtype=tf.int32)
# Subtract one so that labels are in [0, 1000)
label -= 1
image_bytes = tf.reshape(parsed['image/encoded'], shape=[])
image, label = self.preprocess(image_bytes, label)
return image, label
def preprocess(self, image: tf.Tensor, label: tf.Tensor
) -> Tuple[tf.Tensor, tf.Tensor]:
"""Apply image preprocessing and augmentation to the image and label."""
if self.is_training:
image = preprocessing.preprocess_for_train(
image,
image_size=self.image_size,
mean_subtract=self.config.mean_subtract,
standardize=self.config.standardize,
dtype=self.config.dtype,
augmenter=self.augmenter)
else:
image = preprocessing.preprocess_for_eval(
image,
image_size=self.image_size,
num_channels=self.num_channels,
mean_subtract=self.config.mean_subtract,
standardize=self.config.standardize,
dtype=self.config.dtype)
label = tf.cast(label, tf.int32)
if self.config.one_hot:
label = tf.one_hot(label, self.num_classes)
label = tf.reshape(label, [self.num_classes])
return image, label
@classmethod
def from_params(cls, *args, **kwargs):
"""Construct a dataset builder from a default config and any overrides."""
config = DatasetConfig.from_args(*args, **kwargs)
return cls(config)
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common modeling utilities."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import numpy as np
import tensorflow.compat.v1 as tf1
import tensorflow.compat.v2 as tf
from typing import Text, Optional
from tensorflow.python.tpu import tpu_function
@tf.keras.utils.register_keras_serializable(package='Text')
class TpuBatchNormalization(tf.keras.layers.BatchNormalization):
"""Cross replica batch normalization."""
def __init__(self, fused: Optional[bool] = False, **kwargs):
if fused in (True, None):
raise ValueError('TpuBatchNormalization does not support fused=True.')
super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
def _cross_replica_average(self, t: tf.Tensor, num_shards_per_group: int):
"""Calculates the average value of input tensor across TPU replicas."""
num_shards = tpu_function.get_tpu_context().number_of_shards
group_assignment = None
if num_shards_per_group > 1:
if num_shards % num_shards_per_group != 0:
raise ValueError(
'num_shards: %d mod shards_per_group: %d, should be 0' %
(num_shards, num_shards_per_group))
num_groups = num_shards // num_shards_per_group
group_assignment = [[
x for x in range(num_shards) if x // num_shards_per_group == y
] for y in range(num_groups)]
return tf1.tpu.cross_replica_sum(t, group_assignment) / tf.cast(
num_shards_per_group, t.dtype)
def _moments(self, inputs: tf.Tensor, reduction_axes: int, keep_dims: int):
"""Compute the mean and variance: it overrides the original _moments."""
shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
inputs, reduction_axes, keep_dims=keep_dims)
num_shards = tpu_function.get_tpu_context().number_of_shards or 1
if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices.
num_shards_per_group = 1
else:
num_shards_per_group = max(8, num_shards // 8)
if num_shards_per_group > 1:
# Compute variance using: Var[X]= E[X^2] - E[X]^2.
shard_square_of_mean = tf.math.square(shard_mean)
shard_mean_of_square = shard_variance + shard_square_of_mean
group_mean = self._cross_replica_average(shard_mean, num_shards_per_group)
group_mean_of_square = self._cross_replica_average(
shard_mean_of_square, num_shards_per_group)
group_variance = group_mean_of_square - tf.math.square(group_mean)
return (group_mean, group_variance)
else:
return (shard_mean, shard_variance)
def get_batch_norm(batch_norm_type: Text) -> tf.keras.layers.BatchNormalization:
"""A helper to create a batch normalization getter.
Args:
batch_norm_type: The type of batch normalization layer implementation. `tpu`
will use `TpuBatchNormalization`.
Returns:
An instance of `tf.keras.layers.BatchNormalization`.
"""
if batch_norm_type == 'tpu':
return TpuBatchNormalization
return tf.keras.layers.BatchNormalization
def count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(np.sum([tf.keras.backend.count_params(p)
for p in model.trainable_weights]))
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Configuration definitions for EfficientNet losses, learning rates, and optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, Mapping
import dataclasses
from official.vision.image_classification.configs import base_configs
@dataclasses.dataclass
class EfficientNetModelConfig(base_configs.ModelConfig):
"""Configuration for the EfficientNet model.
This configuration will default to settings used for training efficientnet-b0
on a v3-8 TPU on ImageNet.
Attributes:
name: The name of the model. Defaults to 'EfficientNet'.
num_classes: The number of classes in the model.
model_params: A dictionary that represents the parameters of the
EfficientNet model. These will be passed in to the "from_name" function.
loss: The configuration for loss. Defaults to a categorical cross entropy
implementation.
optimizer: The configuration for optimizations. Defaults to an RMSProp
configuration.
learning_rate: The configuration for learning rate. Defaults to an
exponential configuration.
"""
name: str = 'EfficientNet'
num_classes: int = 1000
model_params: Mapping[str, Any] = dataclasses.field(default_factory=lambda: {
'model_name': 'efficientnet-b0',
'model_weights_path': '',
'copy_to_local': False,
'overrides': {
'batch_norm': 'default',
'rescale_input': True,
'num_classes': 1000,
}
})
loss: base_configs.LossConfig = base_configs.LossConfig(
name='categorical_crossentropy',
label_smoothing=0.1)
optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
name='rmsprop',
decay=0.9,
epsilon=0.001,
momentum=0.9,
moving_average_decay=None)
learning_rate: base_configs.LearningRateConfig = base_configs.LearningRateConfig( # pylint: disable=line-too-long
name='exponential',
initial_lr=0.008,
decay_epochs=2.4,
decay_rate=0.97,
warmup_epochs=5,
scale_by_batch_size=1. / 128.)
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for EfficientNet model.
[1] Mingxing Tan, Quoc V. Le
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
ICML'19, https://arxiv.org/abs/1905.11946
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
from typing import Any, Dict, Optional, Text, Tuple
from absl import logging
from dataclasses import dataclass
import tensorflow.compat.v2 as tf
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
from official.vision.image_classification import preprocessing
from official.vision.image_classification.efficientnet import common_modules
@dataclass
class BlockConfig(base_config.Config):
"""Config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_conv: bool = False
conv_type: str = 'depthwise'
@dataclass
class ModelConfig(base_config.Config):
"""Default Config for Efficientnet-B0."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: int = 224
dropout_rate: float = 0.2
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio)
# pylint: disable=bad-whitespace
BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25),
BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25),
BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25),
BlockConfig.from_args(112, 192, 5, 4, 6, (2, 2), 0.25),
BlockConfig.from_args(192, 320, 3, 1, 6, (1, 1), 0.25),
# pylint: enable=bad-whitespace
)
stem_base_filters: int = 32
top_base_filters: int = 1280
activation: str = 'simple_swish'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.2
depth_divisor: int = 8
min_depth: Optional[int] = None
use_se: bool = True
input_channels: int = 3
num_classes: int = 1000
model_name: str = 'efficientnet'
rescale_input: bool = True
data_format: str = 'channels_last'
dtype: str = 'float32'
MODEL_CONFIGS = {
# (width, depth, resolution, dropout)
'efficientnet-b0': ModelConfig.from_args(1.0, 1.0, 224, 0.2),
'efficientnet-b1': ModelConfig.from_args(1.0, 1.1, 240, 0.2),
'efficientnet-b2': ModelConfig.from_args(1.1, 1.2, 260, 0.3),
'efficientnet-b3': ModelConfig.from_args(1.2, 1.4, 300, 0.3),
'efficientnet-b4': ModelConfig.from_args(1.4, 1.8, 380, 0.4),
'efficientnet-b5': ModelConfig.from_args(1.6, 2.2, 456, 0.4),
'efficientnet-b6': ModelConfig.from_args(1.8, 2.6, 528, 0.5),
'efficientnet-b7': ModelConfig.from_args(2.0, 3.1, 600, 0.5),
}
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def round_filters(filters: int,
config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Any = None,
depthwise: bool = False,
name: Text = None):
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = config.data_format
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER})
x = conv2d(**init_kwargs)(inputs)
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
x = batch_norm(axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn')(x)
if activation is not None:
x = tf.keras.layers.Activation(activation,
name=name + '_activation')(x)
return x
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Text = None):
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = config.data_format
use_depthwise = block.conv_type != 'no_depthwise'
prefix = prefix or ''
filters = block.input_filters * block.expand_ratio
x = inputs
if block.fused_conv:
# If we use fused mbconv, skip expansion and use regular conv.
x = conv2d_block(x,
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused')
else:
if block.expand_ratio != 1:
# Expansion phase
kernel_size = (1, 1) if use_depthwise else (3, 3)
x = conv2d_block(x,
filters,
config,
kernel_size=kernel_size,
activation=activation,
name=prefix + 'expand')
# Depthwise Convolution
if use_depthwise:
x = conv2d_block(x,
conv_filters=None,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise')
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(
block.input_filters * block.se_ratio
))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
se = conv2d_block(se,
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce')
se = conv2d_block(se,
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand')
x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
# Output phase
x = conv2d_block(x,
block.output_filters,
config,
activation=None,
name=prefix + 'project')
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
x = tf.keras.layers.Activation(tf_utils.get_activation('identity'),
name=prefix + 'id')(x)
if (block.id_skip
and all(s == 1 for s in block.strides)
and block.input_filters == block.output_filters):
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling by
# drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
x = tf.keras.layers.Dropout(drop_connect_rate,
noise_shape=(None, 1, 1, 1),
name=prefix + 'drop')(x)
x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
return x
def efficientnet(image_input: tf.keras.layers.Input,
config: ModelConfig):
"""Creates an EfficientNet graph given the model parameters.
This function is wrapped by the `EfficientNet` class to make a tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
the output of efficientnet
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = config.data_format
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if rescale_input:
x = preprocessing.normalize_images(x,
num_channels=input_channels,
dtype=dtype,
data_format=data_format)
# Build stem
x = conv2d_block(x,
round_filters(stem_base_filters, config),
config,
kernel_size=[3, 3],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(block.num_repeat for block in blocks)
block_num = 0
for stack_idx, block in enumerate(blocks):
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = mb_conv_block(x, block, config, block_prefix)
block_num += 1
if block.num_repeat > 1:
block = block.replace(
input_filters=block.output_filters,
strides=[1, 1]
)
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = mb_conv_block(x, block, config, prefix=block_prefix)
block_num += 1
# Build top
x = conv2d_block(x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Dense(
num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
return x
@tf.keras.utils.register_keras_serializable(package='Vision')
class EfficientNet(tf.keras.Model):
"""Wrapper class for an EfficientNet Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
config: ModelConfig = None,
overrides: Dict[Text, Any] = None):
"""Create an EfficientNet model.
Args:
config: (optional) the main model parameters to create the model
overrides: (optional) a dict containing keys that can override
config
"""
overrides = overrides or {}
config = config or ModelConfig()
self.config = config.replace(**overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
input_shape = (None, None, input_channels) # Should handle any size image
image_input = tf.keras.layers.Input(shape=input_shape)
output = efficientnet(image_input, self.config)
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
logging.info('Building model %s with params %s',
model_name,
self.config)
super(EfficientNet, self).__init__(
inputs=image_input, outputs=output, name=model_name)
@classmethod
def from_name(cls,
model_name: Text,
model_weights_path: Text = None,
copy_to_local: bool = False,
overrides: Dict[Text, Any] = None):
"""Construct an EfficientNet model from a predefined model name.
E.g., `EfficientNet.from_name('efficientnet-b0')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
copy_to_local: copy the weights to a local tmp dir
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
model_configs = dict(MODEL_CONFIGS)
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
model_configs.update(overrides.pop('model_config', {}))
if model_name not in model_configs:
raise ValueError('Unknown model name {}'.format(model_name))
config = model_configs[model_name]
model = cls(config=config, overrides=overrides)
if model_weights_path:
if copy_to_local:
tmp_file = os.path.join('/tmp', model_name + '.h5')
model_weights_file = os.path.join(model_weights_path, 'model.h5')
tf.io.gfile.copy(model_weights_file, tmp_file, overwrite=True)
model_weights_path = tmp_file
model.load_weights(model_weights_path)
return model
# Lint as: python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Learning rate utilities for vision tasks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, List, Mapping
import tensorflow.compat.v2 as tf
BASE_LEARNING_RATE = 0.1
class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
"""A wrapper for LearningRateSchedule that includes warmup steps."""
def __init__(
self,
lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule,
warmup_steps: int):
"""Add warmup decay to a learning rate schedule.
Args:
lr_schedule: base learning rate scheduler
warmup_steps: number of warmup steps
"""
super(WarmupDecaySchedule, self).__init__()
self._lr_schedule = lr_schedule
self._warmup_steps = warmup_steps
def __call__(self, step: int):
lr = self._lr_schedule(step)
if self._warmup_steps:
initial_learning_rate = tf.convert_to_tensor(
self._lr_schedule.initial_learning_rate, name="initial_learning_rate")
dtype = initial_learning_rate.dtype
global_step_recomp = tf.cast(step, dtype)
warmup_steps = tf.cast(self._warmup_steps, dtype)
warmup_lr = initial_learning_rate * global_step_recomp / warmup_steps
lr = tf.cond(global_step_recomp < warmup_steps,
lambda: warmup_lr,
lambda: lr)
return lr
def get_config(self) -> Mapping[str, Any]:
config = self._lr_schedule.get_config()
config.update({
"warmup_steps": self._warmup_steps,
})
return config
# TODO(b/149030439) - refactor this with
# tf.keras.optimizers.schedules.PiecewiseConstantDecay + WarmupDecaySchedule.
class PiecewiseConstantDecayWithWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Piecewise constant decay with warmup schedule."""
def __init__(self,
batch_size: int,
epoch_size: int,
warmup_epochs: int,
boundaries: List[int],
multipliers: List[float]):
"""Piecewise constant decay with warmup.
Args:
batch_size: The training batch size used in the experiment.
epoch_size: The size of an epoch, or the number of examples in an epoch.
warmup_epochs: The number of warmup epochs to apply.
boundaries: The list of floats with strictly increasing entries.
multipliers: The list of multipliers/learning rates to use for the
piecewise portion. The length must be 1 less than that of boundaries.
"""
super(PiecewiseConstantDecayWithWarmup, self).__init__()
if len(boundaries) != len(multipliers) - 1:
raise ValueError("The length of boundaries must be 1 less than the "
"length of multipliers")
base_lr_batch_size = 256
steps_per_epoch = epoch_size // batch_size
self._rescaled_lr = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
self._step_boundaries = [float(steps_per_epoch) * x for x in boundaries]
self._lr_values = [self._rescaled_lr * m for m in multipliers]
self._warmup_steps = warmup_epochs * steps_per_epoch
def __call__(self, step: int):
"""Compute learning rate at given step."""
def warmup_lr():
return self._rescaled_lr * (
step / tf.cast(self._warmup_steps, tf.float32))
def piecewise_lr():
return tf.compat.v1.train.piecewise_constant(
tf.cast(step, tf.float32), self._step_boundaries, self._lr_values)
return tf.cond(step < self._warmup_steps, warmup_lr, piecewise_lr)
def get_config(self) -> Mapping[str, Any]:
return {
"rescaled_lr": self._rescaled_lr,
"step_boundaries": self._step_boundaries,
"lr_values": self._lr_values,
"warmup_steps": self._warmup_steps,
}
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for learning_rate."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v2 as tf
from official.vision.image_classification import learning_rate
class LearningRateTests(tf.test.TestCase):
def test_warmup_decay(self):
"""Basic computational test for warmup decay."""
initial_lr = 0.01
decay_steps = 100
decay_rate = 0.01
warmup_steps = 10
base_lr = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=initial_lr,
decay_steps=decay_steps,
decay_rate=decay_rate)
lr = learning_rate.WarmupDecaySchedule(
lr_schedule=base_lr,
warmup_steps=warmup_steps)
for step in range(warmup_steps - 1):
config = lr.get_config()
self.assertEqual(config['warmup_steps'], warmup_steps)
self.assertAllClose(self.evaluate(lr(step)),
step / warmup_steps * initial_lr)
def test_piecewise_constant_decay_with_warmup(self):
"""Basic computational test for piecewise constant decay with warmup."""
boundaries = [1, 2, 3]
warmup_epochs = boundaries[0]
learning_rate_multipliers = [1.0, 0.1, 0.001]
expected_keys = [
'rescaled_lr', 'step_boundaries', 'lr_values', 'warmup_steps',
]
expected_lrs = [0.0, 0.1, 0.1]
lr = learning_rate.PiecewiseConstantDecayWithWarmup(
batch_size=256,
epoch_size=256,
warmup_epochs=warmup_epochs,
boundaries=boundaries[1:],
multipliers=learning_rate_multipliers)
step = 0
config = lr.get_config()
self.assertAllInSet(list(config.keys()), expected_keys)
for boundary, expected_lr in zip(boundaries, expected_lrs):
for _ in range(step, boundary):
self.assertAllClose(self.evaluate(lr(step)), expected_lr)
step += 1
def test_piecewise_constant_decay_invalid_boundaries(self):
with self.assertRaisesRegex(ValueError,
'The length of boundaries must be 1 less '):
learning_rate.PiecewiseConstantDecayWithWarmup(
batch_size=256,
epoch_size=256,
warmup_epochs=1,
boundaries=[1, 2],
multipliers=[1, 2])
if __name__ == '__main__':
assert tf.version.VERSION.startswith('2.')
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Optimizer factory for vision tasks."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
from absl import logging
import tensorflow.compat.v2 as tf
import tensorflow_addons as tfa
from typing import Any, Dict, Text
from official.vision.image_classification import learning_rate
from official.vision.image_classification.configs import base_configs
def build_optimizer(
optimizer_name: Text,
base_learning_rate: tf.keras.optimizers.schedules.LearningRateSchedule,
params: Dict[Text, Any]):
"""Build the optimizer based on name.
Args:
optimizer_name: String representation of the optimizer name. Examples:
sgd, momentum, rmsprop.
base_learning_rate: `tf.keras.optimizers.schedules.LearningRateSchedule`
base learning rate.
params: String -> Any dictionary representing the optimizer params.
This should contain optimizer specific parameters such as
`base_learning_rate`, `decay`, etc.
Returns:
A tf.keras.Optimizer.
Raises:
ValueError if the provided optimizer_name is not supported.
"""
optimizer_name = optimizer_name.lower()
logging.info('Building %s optimizer with params %s', optimizer_name, params)
if optimizer_name == 'sgd':
logging.info('Using SGD optimizer')
nesterov = params.get('nesterov', False)
optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate,
nesterov=nesterov)
elif optimizer_name == 'momentum':
logging.info('Using momentum optimizer')
nesterov = params.get('nesterov', False)
optimizer = tf.keras.optimizers.SGD(learning_rate=base_learning_rate,
momentum=params['momentum'],
nesterov=nesterov)
elif optimizer_name == 'rmsprop':
logging.info('Using RMSProp')
rho = params.get('decay', None) or params.get('rho', 0.9)
momentum = params.get('momentum', 0.9)
epsilon = params.get('epsilon', 1e-07)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate,
rho=rho,
momentum=momentum,
epsilon=epsilon)
elif optimizer_name == 'adam':
logging.info('Using Adam')
beta_1 = params.get('beta_1', 0.9)
beta_2 = params.get('beta_2', 0.999)
epsilon = params.get('epsilon', 1e-07)
optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate,
beta_1=beta_1,
beta_2=beta_2,
epsilon=epsilon)
elif optimizer_name == 'adamw':
logging.info('Using AdamW')
weight_decay = params.get('weight_decay', 0.01)
beta_1 = params.get('beta_1', 0.9)
beta_2 = params.get('beta_2', 0.999)
epsilon = params.get('epsilon', 1e-07)
optimizer = tfa.optimizers.AdamW(weight_decay=weight_decay,
learning_rate=base_learning_rate,
beta_1=beta_1,
beta_2=beta_2,
epsilon=epsilon)
else:
raise ValueError('Unknown optimizer %s' % optimizer_name)
moving_average_decay = params.get('moving_average_decay', 0.)
if moving_average_decay is not None and moving_average_decay > 0.:
logging.info('Including moving average decay.')
optimizer = tfa.optimizers.MovingAverage(
optimizer,
average_decay=params['moving_average_decay'],
num_updates=None)
if params.get('lookahead', None):
logging.info('Using lookahead optimizer.')
optimizer = tfa.optimizers.Lookahead(optimizer)
return optimizer
def build_learning_rate(params: base_configs.LearningRateConfig,
batch_size: int = None,
train_steps: int = None):
"""Build the learning rate given the provided configuration."""
decay_type = params.name
base_lr = params.initial_lr
decay_rate = params.decay_rate
if params.decay_epochs is not None:
decay_steps = params.decay_epochs * train_steps
else:
decay_steps = 0
if params.warmup_epochs is not None:
warmup_steps = params.warmup_epochs * train_steps
else:
warmup_steps = 0
lr_multiplier = params.scale_by_batch_size
if lr_multiplier and lr_multiplier > 0:
# Scale the learning rate based on the batch size and a multiplier
base_lr *= lr_multiplier * batch_size
logging.info('Scaling the learning rate based on the batch size '
'multiplier. New base_lr: %f', base_lr)
if decay_type == 'exponential':
logging.info('Using exponential learning rate with: '
'initial_learning_rate: %f, decay_steps: %d, '
'decay_rate: %f', base_lr, decay_steps, decay_rate)
lr = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=base_lr,
decay_steps=decay_steps,
decay_rate=decay_rate)
elif decay_type == 'piecewise_constant_with_warmup':
logging.info('Using Piecewise constant decay with warmup. '
'Parameters: batch_size: %d, epoch_size: %d, '
'warmup_epochs: %d, boundaries: %s, multipliers: %s',
batch_size, params.examples_per_epoch,
params.warmup_epochs, params.boundaries,
params.multipliers)
lr = learning_rate.PiecewiseConstantDecayWithWarmup(
batch_size=batch_size,
epoch_size=params.examples_per_epoch,
warmup_epochs=params.warmup_epochs,
boundaries=params.boundaries,
multipliers=params.multipliers)
if warmup_steps > 0:
if decay_type != 'piecewise_constant_with_warmup':
logging.info('Applying %d warmup steps to the learning rate',
warmup_steps)
lr = learning_rate.WarmupDecaySchedule(lr, warmup_steps)
return lr
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for optimizer_factory."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import tensorflow.compat.v2 as tf
from absl.testing import parameterized
from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('sgd', 'sgd', 0., False),
('momentum', 'momentum', 0., False),
('rmsprop', 'rmsprop', 0., False),
('adam', 'adam', 0., False),
('adamw', 'adamw', 0., False),
('momentum_lookahead', 'momentum', 0., True),
('sgd_ema', 'sgd', 0.001, False),
('momentum_ema', 'momentum', 0.001, False),
('rmsprop_ema', 'rmsprop', 0.001, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors."""
params = {
'learning_rate': 0.001,
'rho': 0.09,
'momentum': 0.,
'epsilon': 1e-07,
'moving_average_decay': moving_average_decay,
'lookahead': lookahead,
}
optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'],
params=params)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self):
with self.assertRaises(ValueError):
optimizer_factory.build_optimizer(
optimizer_name='this_optimizer_does_not_exist',
base_learning_rate=None,
params=None)
def test_learning_rate_without_decay_or_warmups(self):
params = base_configs.LearningRateConfig(
name='exponential',
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=None,
warmup_epochs=None,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params,
batch_size=batch_size,
train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
@parameterized.named_parameters(
('exponential', 'exponential'),
('piecewise_constant_with_warmup', 'piecewise_constant_with_warmup'))
def test_learning_rate_with_decay_and_warmup(self, lr_decay_type):
"""Basic smoke test for syntax."""
params = base_configs.LearningRateConfig(
name=lr_decay_type,
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=1,
warmup_epochs=1,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params,
batch_size=batch_size,
train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
if __name__ == '__main__':
assert tf.version.VERSION.startswith('2.')
tf.test.main()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocessing functions for images."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import tensorflow.compat.v2 as tf
from typing import List, Optional, Text, Tuple
from official.vision.image_classification import augment
# Calculated from the ImageNet training set
MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
IMAGE_SIZE = 224
CROP_PADDING = 32
def mean_image_subtraction(
image_bytes: tf.Tensor,
means: Tuple[float, ...],
num_channels: int = 3,
dtype: tf.dtypes.DType = tf.float32,
) -> tf.Tensor:
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image_bytes = mean_image_subtraction(image_bytes, means)
Note that the rank of `image` must be known.
Args:
image_bytes: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
num_channels: number of color channels in the image that will be distorted.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if image_bytes.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
# We have a 1-D tensor of means; convert to 3-D.
# Note(b/130245863): we explicitly call `broadcast` instead of simply
# expanding dimensions for better performance.
means = tf.broadcast_to(means, tf.shape(image_bytes))
if dtype is not None:
means = tf.cast(means, dtype=dtype)
return image_bytes - means
def standardize_image(
image_bytes: tf.Tensor,
stddev: Tuple[float, ...],
num_channels: int = 3,
dtype: tf.dtypes.DType = tf.float32,
) -> tf.Tensor:
"""Divides the given stddev from each image channel.
For example:
stddev = [123.68, 116.779, 103.939]
image_bytes = standardize_image(image_bytes, stddev)
Note that the rank of `image` must be known.
Args:
image_bytes: a tensor of size [height, width, C].
stddev: a C-vector of values to divide from each channel.
num_channels: number of color channels in the image that will be distorted.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `stddev`.
"""
if image_bytes.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
if len(stddev) != num_channels:
raise ValueError('len(stddev) must match the number of channels')
# We have a 1-D tensor of stddev; convert to 3-D.
# Note(b/130245863): we explicitly call `broadcast` instead of simply
# expanding dimensions for better performance.
stddev = tf.broadcast_to(stddev, tf.shape(image_bytes))
if dtype is not None:
stddev = tf.cast(stddev, dtype=dtype)
return image_bytes / stddev
def normalize_images(features: tf.Tensor,
mean_rgb: Tuple[float, ...] = MEAN_RGB,
stddev_rgb: Tuple[float, ...] = STDDEV_RGB,
num_channels: int = 3,
dtype: tf.dtypes.DType = tf.float32,
data_format: Text = 'channels_last') -> tf.Tensor:
"""Normalizes the input image channels with the given mean and stddev.
Args:
features: `Tensor` representing decoded images in float format.
mean_rgb: the mean of the channels to subtract.
stddev_rgb: the stddev of the channels to divide.
num_channels: the number of channels in the input image tensor.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
data_format: the format of the input image tensor
['channels_first', 'channels_last'].
Returns:
A normalized image `Tensor`.
"""
# TODO(allencwang) - figure out how to use mean_image_subtraction and
# standardize_image on batches of images and replace the following.
if data_format == 'channels_first':
stats_shape = [num_channels, 1, 1]
else:
stats_shape = [1, 1, num_channels]
if dtype is not None:
features = tf.image.convert_image_dtype(features, dtype=dtype)
if mean_rgb is not None:
mean_rgb = tf.constant(mean_rgb,
shape=stats_shape,
dtype=features.dtype)
mean_rgb = tf.broadcast_to(mean_rgb, tf.shape(features))
features = features - mean_rgb
if stddev_rgb is not None:
stddev_rgb = tf.constant(stddev_rgb,
shape=stats_shape,
dtype=features.dtype)
stddev_rgb = tf.broadcast_to(stddev_rgb, tf.shape(features))
features = features / stddev_rgb
return features
def decode_and_center_crop(image_bytes: tf.Tensor,
image_size: int = IMAGE_SIZE,
crop_padding: int = CROP_PADDING) -> tf.Tensor:
"""Crops to center of image with padding then scales image_size.
Args:
image_bytes: `Tensor` representing an image binary of arbitrary size.
image_size: image height/width dimension.
crop_padding: the padding size to use when centering the crop.
Returns:
A decoded and cropped image `Tensor`.
"""
decoded = image_bytes.dtype != tf.string
shape = (tf.shape(image_bytes) if decoded
else tf.image.extract_jpeg_shape(image_bytes))
image_height = shape[0]
image_width = shape[1]
padded_center_crop_size = tf.cast(
((image_size / (image_size + crop_padding)) *
tf.cast(tf.minimum(image_height, image_width), tf.float32)),
tf.int32)
offset_height = ((image_height - padded_center_crop_size) + 1) // 2
offset_width = ((image_width - padded_center_crop_size) + 1) // 2
crop_window = tf.stack([offset_height, offset_width,
padded_center_crop_size, padded_center_crop_size])
if decoded:
image = tf.image.crop_to_bounding_box(
image_bytes,
offset_height=offset_height,
offset_width=offset_width,
target_height=padded_center_crop_size,
target_width=padded_center_crop_size)
else:
image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3)
image = resize_image(image_bytes=image,
height=image_size,
width=image_size)
return image
def decode_crop_and_flip(image_bytes: tf.Tensor) -> tf.Tensor:
"""Crops an image to a random part of the image, then randomly flips.
Args:
image_bytes: `Tensor` representing an image binary of arbitrary size.
Returns:
A decoded and cropped image `Tensor`.
"""
decoded = image_bytes.dtype != tf.string
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
shape = (tf.shape(image_bytes) if decoded
else tf.image.extract_jpeg_shape(image_bytes))
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
shape,
bounding_boxes=bbox,
min_object_covered=0.1,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.05, 1.0],
max_attempts=100,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, _ = sample_distorted_bounding_box
# Reassemble the bounding box in the format the crop op requires.
offset_height, offset_width, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_height, offset_width,
target_height, target_width])
if decoded:
cropped = tf.image.crop_to_bounding_box(
image_bytes,
offset_height=offset_height,
offset_width=offset_width,
target_height=target_height,
target_width=target_width)
else:
cropped = tf.image.decode_and_crop_jpeg(image_bytes,
crop_window,
channels=3)
# Flip to add a little more random distortion in.
cropped = tf.image.random_flip_left_right(cropped)
return cropped
def resize_image(image_bytes: tf.Tensor,
height: int = IMAGE_SIZE,
width: int = IMAGE_SIZE) -> tf.Tensor:
"""Resizes an image to a given height and width.
Args:
image_bytes: `Tensor` representing an image binary of arbitrary size.
height: image height dimension.
width: image width dimension.
Returns:
A tensor containing the resized image.
"""
return tf.compat.v1.image.resize(
image_bytes, [height, width], method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
def preprocess_for_eval(
image_bytes: tf.Tensor,
image_size: int = IMAGE_SIZE,
num_channels: int = 3,
mean_subtract: bool = False,
standardize: bool = False,
dtype: tf.dtypes.DType = tf.float32
) -> tf.Tensor:
"""Preprocesses the given image for evaluation.
Args:
image_bytes: `Tensor` representing an image binary of arbitrary size.
image_size: image height/width dimension.
num_channels: number of image input channels.
mean_subtract: whether or not to apply mean subtraction.
standardize: whether or not to apply standardization.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
Returns:
A preprocessed and normalized image `Tensor`.
"""
images = decode_and_center_crop(image_bytes, image_size)
images = tf.reshape(images, [image_size, image_size, num_channels])
if mean_subtract:
images = mean_image_subtraction(image_bytes=images, means=MEAN_RGB)
if standardize:
images = standardize_image(image_bytes=images, stddev=STDDEV_RGB)
if dtype is not None:
images = tf.image.convert_image_dtype(images, dtype=dtype)
return images
def load_eval_image(filename: Text, image_size: int = IMAGE_SIZE) -> tf.Tensor:
"""Reads an image from the filesystem and applies image preprocessing.
Args:
filename: a filename path of an image.
image_size: image height/width dimension.
Returns:
A preprocessed and normalized image `Tensor`.
"""
image_bytes = tf.io.read_file(filename)
image = preprocess_for_eval(image_bytes, image_size)
return image
def build_eval_dataset(filenames: List[Text],
labels: List[int] = None,
image_size: int = IMAGE_SIZE,
batch_size: int = 1) -> tf.Tensor:
"""Builds a tf.data.Dataset from a list of filenames and labels.
Args:
filenames: a list of filename paths of images.
labels: a list of labels corresponding to each image.
image_size: image height/width dimension.
batch_size: the batch size used by the dataset
Returns:
A preprocessed and normalized image `Tensor`.
"""
if labels is None:
labels = [0] * len(filenames)
filenames = tf.constant(filenames)
labels = tf.constant(labels)
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
dataset = dataset.map(
lambda filename, label: (load_eval_image(filename, image_size), label))
dataset = dataset.batch(batch_size)
return dataset
def preprocess_for_train(image_bytes: tf.Tensor,
image_size: int = IMAGE_SIZE,
augmenter: Optional[augment.ImageAugment] = None,
mean_subtract: bool = False,
standardize: bool = False,
dtype: tf.dtypes.DType = tf.float32) -> tf.Tensor:
"""Preprocesses the given image for training.
Args:
image_bytes: `Tensor` representing an image binary of
arbitrary size of dtype tf.uint8.
image_size: image height/width dimension.
augmenter: the image augmenter to apply.
mean_subtract: whether or not to apply mean subtraction.
standardize: whether or not to apply standardization.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
Returns:
A preprocessed and normalized image `Tensor`.
"""
images = decode_crop_and_flip(image_bytes=image_bytes)
images = resize_image(images, height=image_size, width=image_size)
if mean_subtract:
images = mean_image_subtraction(image_bytes=images, means=MEAN_RGB)
if standardize:
images = standardize_image(image_bytes=images, stddev=STDDEV_RGB)
if augmenter is not None:
images = augmenter.distort(images)
if dtype is not None:
images = tf.image.convert_image_dtype(images, dtype)
return images
This folder contains a compile/fit and
[custom training loop (CTL)](#resnet-custom-training-loop) implementation for
ResNet50.
## Before you begin
Please refer to the [README](../README.md) in the parent directory for
information on setup and preparing the data.
## ResNet (custom training loop)
Similar to the [estimator implementation](../../../r1/resnet), the Keras
implementation has code for the ImageNet dataset. The ImageNet
version uses a ResNet50 model implemented in
[`resnet_model.py`](./resnet_model.py).
### Pretrained Models
* [ResNet50 Checkpoints](https://storage.googleapis.com/cloud-tpu-checkpoints/resnet/resnet50.tar.gz)
* ResNet50 TFHub: [feature vector](https://tfhub.dev/tensorflow/resnet_50/feature_vector/1)
and [classification](https://tfhub.dev/tensorflow/resnet_50/classification/1)
```bash
python3 resnet_imagenet_main.py
```
Again, if you did not download the data to the default directory, specify the
location with the `--data_dir` flag:
```bash
python3 resnet_imagenet_main.py --data_dir=/path/to/imagenet
```
There are more flag options you can specify. Here are some examples:
- `--use_synthetic_data`: when set to true, synthetic data, rather than real
data, are used;
- `--batch_size`: the batch size used for the model;
- `--model_dir`: the directory to save the model checkpoint;
- `--train_epochs`: number of epoches to run for training the model;
- `--train_steps`: number of steps to run for training the model. We now only
support a number that is smaller than the number of batches in an epoch.
- `--skip_eval`: when set to true, evaluation as well as validation during
training is skipped
For example, this is a typical command line to run with ImageNet data with
batch size 128 per GPU:
```bash
python3 -m resnet_imagenet_main.py \
--model_dir=/tmp/model_dir/something \
--num_gpus=2 \
--batch_size=128 \
--train_epochs=90 \
--train_steps=10 \
--use_synthetic_data=false
```
See [`common.py`](common.py) for full list of options.
### Using multiple GPUs
You can train these models on multiple GPUs using `tf.distribute.Strategy` API.
You can read more about them in this
[guide](https://www.tensorflow.org/guide/distribute_strategy).
In this example, we have made it easier to use is with just a command line flag
`--num_gpus`. By default this flag is 1 if TensorFlow is compiled with CUDA,
and 0 otherwise.
- --num_gpus=0: Uses tf.distribute.OneDeviceStrategy with CPU as the device.
- --num_gpus=1: Uses tf.distribute.OneDeviceStrategy with GPU as the device.
- --num_gpus=2+: Uses tf.distribute.MirroredStrategy to run synchronous
distributed training across the GPUs.
If you wish to run without `tf.distribute.Strategy`, you can do so by setting
`--distribution_strategy=off`.
### Running on multiple GPU hosts
You can also train these models on multiple hosts, each with GPUs, using
`tf.distribute.Strategy`.
The easiest way to run multi-host benchmarks is to set the
[`TF_CONFIG`](https://www.tensorflow.org/guide/distributed_training#TF_CONFIG)
appropriately at each host. e.g., to run using `MultiWorkerMirroredStrategy` on
2 hosts, the `cluster` in `TF_CONFIG` should have 2 `host:port` entries, and
host `i` should have the `task` in `TF_CONFIG` set to `{"type": "worker",
"index": i}`. `MultiWorkerMirroredStrategy` will automatically use all the
available GPUs at each host.
### Running on Cloud TPUs
Note: This model will **not** work with TPUs on Colab.
You can train the ResNet CTL model on Cloud TPUs using
`tf.distribute.TPUStrategy`. If you are not familiar with Cloud TPUs, it is
strongly recommended that you go through the
[quickstart](https://cloud.google.com/tpu/docs/quickstart) to learn how to
create a TPU and GCE VM.
To run ResNet model on a TPU, you must set `--distribution_strategy=tpu` and
`--tpu=$TPU_NAME`, where `$TPU_NAME` the name of your TPU in the Cloud Console.
From a GCE VM, you can run the following command to train ResNet for one epoch
on a v2-8 or v3-8 TPU by setting `TRAIN_EPOCHS` to 1:
```bash
python3 resnet_ctl_imagenet_main.py \
--tpu=$TPU_NAME \
--model_dir=$MODEL_DIR \
--data_dir=$DATA_DIR \
--batch_size=1024 \
--steps_per_loop=500 \
--train_epochs=$TRAIN_EPOCHS \
--use_synthetic_data=false \
--dtype=fp32 \
--enable_eager=true \
--enable_tensorboard=true \
--distribution_strategy=tpu \
--log_steps=50 \
--single_l2_loss_op=true \
--use_tf_function=true
```
To train the ResNet to convergence, run it for 90 epochs by setting
`TRAIN_EPOCHS` to 90.
Note: `$MODEL_DIR` and `$DATA_DIR` must be GCS paths.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment