Commit c57e975a authored by saberkun's avatar saberkun
Browse files

Merge pull request #10338 from srihari-humbarwadi:readme

PiperOrigin-RevId: 413033276
parents 7fb4f3cd acf4156e
# Training configuration for EfficientNet-b0 trained on ImageNet on GPUs.
# Takes ~32 minutes per epoch for 8 V100s.
# Reaches ~76.1% within 350 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'mirrored'
num_gpus: 1
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 32
use_per_replica_batch_size: true
dtype: 'float32'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 32
use_per_replica_batch_size: true
dtype: 'float32'
model:
model_params:
model_name: 'efficientnet-b0'
overrides:
num_classes: 1000
batch_norm: 'default'
dtype: 'float32'
activation: 'swish'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
moving_average_decay: 0.0
lookahead: false
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: true
epochs: 500
evaluation:
epochs_between_evals: 1
# Training configuration for EfficientNet-b0 trained on ImageNet on TPUs.
# Takes ~2 minutes, 50 seconds per epoch for v3-32.
# Reaches ~76.1% within 350 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: true
dtype: 'bfloat16'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: true
dtype: 'bfloat16'
model:
model_params:
model_name: 'efficientnet-b0'
overrides:
num_classes: 1000
batch_norm: 'tpu'
dtype: 'bfloat16'
activation: 'swish'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
moving_average_decay: 0.0
lookahead: false
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: true
epochs: 500
set_epoch_loop: true
evaluation:
epochs_between_evals: 1
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'mirrored'
num_gpus: 1
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 32
use_per_replica_batch_size: true
dtype: 'float32'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 32
use_per_replica_batch_size: true
dtype: 'float32'
model:
model_params:
model_name: 'efficientnet-b1'
overrides:
num_classes: 1000
batch_norm: 'default'
dtype: 'float32'
activation: 'swish'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
moving_average_decay: 0.0
lookahead: false
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: true
epochs: 500
evaluation:
epochs_between_evals: 1
# Training configuration for EfficientNet-b1 trained on ImageNet on TPUs.
# Takes ~3 minutes, 15 seconds per epoch for v3-32.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'train'
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: true
dtype: 'bfloat16'
augmenter:
name: 'autoaugment'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'records'
split: 'validation'
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: true
dtype: 'bfloat16'
model:
model_params:
model_name: 'efficientnet-b1'
overrides:
num_classes: 1000
batch_norm: 'tpu'
dtype: 'bfloat16'
activation: 'swish'
optimizer:
name: 'rmsprop'
momentum: 0.9
decay: 0.9
moving_average_decay: 0.0
lookahead: false
learning_rate:
name: 'exponential'
loss:
label_smoothing: 0.1
train:
resume_checkpoint: true
epochs: 500
set_epoch_loop: true
evaluation:
epochs_between_evals: 1
# Training configuration for ResNet trained on ImageNet on GPUs.
# Reaches > 76.1% within 90 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'mirrored'
num_gpus: 1
batchnorm_spatial_persistent: true
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'tfds'
split: 'train'
image_size: 224
num_classes: 1000
num_examples: 1281167
batch_size: 256
use_per_replica_batch_size: true
dtype: 'float16'
mean_subtract: true
standardize: true
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'tfds'
split: 'validation'
image_size: 224
num_classes: 1000
num_examples: 50000
batch_size: 256
use_per_replica_batch_size: true
dtype: 'float16'
mean_subtract: true
standardize: true
model:
name: 'resnet'
model_params:
rescale_inputs: false
optimizer:
name: 'momentum'
momentum: 0.9
decay: 0.9
epsilon: 0.001
loss:
label_smoothing: 0.1
train:
resume_checkpoint: true
epochs: 90
evaluation:
epochs_between_evals: 1
# Training configuration for ResNet trained on ImageNet on TPUs.
# Takes ~4 minutes, 30 seconds seconds per epoch for a v3-32.
# Reaches > 76.1% within 90 epochs.
# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
runtime:
distribution_strategy: 'tpu'
train_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'tfds'
split: 'train'
one_hot: false
image_size: 224
num_classes: 1000
num_examples: 1281167
batch_size: 128
use_per_replica_batch_size: true
mean_subtract: false
standardize: false
dtype: 'bfloat16'
validation_dataset:
name: 'imagenet2012'
data_dir: null
builder: 'tfds'
split: 'validation'
one_hot: false
image_size: 224
num_classes: 1000
num_examples: 50000
batch_size: 128
use_per_replica_batch_size: true
mean_subtract: false
standardize: false
dtype: 'bfloat16'
model:
name: 'resnet'
model_params:
rescale_inputs: true
optimizer:
name: 'momentum'
momentum: 0.9
decay: 0.9
epsilon: 0.001
moving_average_decay: 0.
lookahead: false
loss:
label_smoothing: 0.1
train:
callbacks:
enable_checkpoint_and_export: true
resume_checkpoint: true
epochs: 90
set_epoch_loop: true
evaluation:
epochs_between_evals: 1
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Dataset utilities for vision tasks using TFDS and tf.data.Dataset."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import dataclasses
import os
from typing import Any, List, Mapping, Optional, Tuple, Union
from absl import logging
import tensorflow as tf
import tensorflow_datasets as tfds
from official.legacy.image_classification import augment
from official.legacy.image_classification import preprocessing
from official.modeling.hyperparams import base_config
AUGMENTERS = {
'autoaugment': augment.AutoAugment,
'randaugment': augment.RandAugment,
}
@dataclasses.dataclass
class AugmentConfig(base_config.Config):
"""Configuration for image augmenters.
Attributes:
name: The name of the image augmentation to use. Possible options are None
(default), 'autoaugment', or 'randaugment'.
params: Any parameters used to initialize the augmenter.
"""
name: Optional[str] = None
params: Optional[Mapping[str, Any]] = None
def build(self) -> augment.ImageAugment:
"""Build the augmenter using this config."""
params = self.params or {}
augmenter = AUGMENTERS.get(self.name, None)
return augmenter(**params) if augmenter is not None else None
@dataclasses.dataclass
class DatasetConfig(base_config.Config):
"""The base configuration for building datasets.
Attributes:
name: The name of the Dataset. Usually should correspond to a TFDS dataset.
data_dir: The path where the dataset files are stored, if available.
filenames: Optional list of strings representing the TFRecord names.
builder: The builder type used to load the dataset. Value should be one of
'tfds' (load using TFDS), 'records' (load from TFRecords), or 'synthetic'
(generate dummy synthetic data without reading from files).
split: The split of the dataset. Usually 'train', 'validation', or 'test'.
image_size: The size of the image in the dataset. This assumes that `width`
== `height`. Set to 'infer' to infer the image size from TFDS info. This
requires `name` to be a registered dataset in TFDS.
num_classes: The number of classes given by the dataset. Set to 'infer' to
infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
num_channels: The number of channels given by the dataset. Set to 'infer' to
infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
num_examples: The number of examples given by the dataset. Set to 'infer' to
infer the image size from TFDS info. This requires `name` to be a
registered dataset in TFDS.
batch_size: The base batch size for the dataset.
use_per_replica_batch_size: Whether to scale the batch size based on
available resources. If set to `True`, the dataset builder will return
batch_size multiplied by `num_devices`, the number of device replicas
(e.g., the number of GPUs or TPU cores). This setting should be `True` if
the strategy argument is passed to `build()` and `num_devices > 1`.
num_devices: The number of replica devices to use. This should be set by
`strategy.num_replicas_in_sync` when using a distribution strategy.
dtype: The desired dtype of the dataset. This will be set during
preprocessing.
one_hot: Whether to apply one hot encoding. Set to `True` to be able to use
label smoothing.
augmenter: The augmenter config to use. No augmentation is used by default.
download: Whether to download data using TFDS.
shuffle_buffer_size: The buffer size used for shuffling training data.
file_shuffle_buffer_size: The buffer size used for shuffling raw training
files.
skip_decoding: Whether to skip image decoding when loading from TFDS.
cache: whether to cache to dataset examples. Can be used to avoid re-reading
from disk on the second epoch. Requires significant memory overhead.
tf_data_service: The URI of a tf.data service to offload preprocessing onto
during training. The URI should be in the format "protocol://address",
e.g. "grpc://tf-data-service:5050".
mean_subtract: whether or not to apply mean subtraction to the dataset.
standardize: whether or not to apply standardization to the dataset.
"""
name: Optional[str] = None
data_dir: Optional[str] = None
filenames: Optional[List[str]] = None
builder: str = 'tfds'
split: str = 'train'
image_size: Union[int, str] = 'infer'
num_classes: Union[int, str] = 'infer'
num_channels: Union[int, str] = 'infer'
num_examples: Union[int, str] = 'infer'
batch_size: int = 128
use_per_replica_batch_size: bool = True
num_devices: int = 1
dtype: str = 'float32'
one_hot: bool = True
augmenter: AugmentConfig = AugmentConfig()
download: bool = False
shuffle_buffer_size: int = 10000
file_shuffle_buffer_size: int = 1024
skip_decoding: bool = True
cache: bool = False
tf_data_service: Optional[str] = None
mean_subtract: bool = False
standardize: bool = False
@property
def has_data(self):
"""Whether this dataset is has any data associated with it."""
return self.name or self.data_dir or self.filenames
@dataclasses.dataclass
class ImageNetConfig(DatasetConfig):
"""The base ImageNet dataset config."""
name: str = 'imagenet2012'
# Note: for large datasets like ImageNet, using records is faster than tfds
builder: str = 'records'
image_size: int = 224
num_channels: int = 3
num_examples: int = 1281167
num_classes: int = 1000
batch_size: int = 128
@dataclasses.dataclass
class Cifar10Config(DatasetConfig):
"""The base CIFAR-10 dataset config."""
name: str = 'cifar10'
image_size: int = 224
batch_size: int = 128
download: bool = True
cache: bool = True
class DatasetBuilder:
"""An object for building datasets.
Allows building various pipelines fetching examples, preprocessing, etc.
Maintains additional state information calculated from the dataset, i.e.,
training set split, batch size, and number of steps (batches).
"""
def __init__(self, config: DatasetConfig, **overrides: Any):
"""Initialize the builder from the config."""
self.config = config.replace(**overrides)
self.builder_info = None
if self.config.augmenter is not None:
logging.info('Using augmentation: %s', self.config.augmenter.name)
self.augmenter = self.config.augmenter.build()
else:
self.augmenter = None
@property
def is_training(self) -> bool:
"""Whether this is the training set."""
return self.config.split == 'train'
@property
def batch_size(self) -> int:
"""The batch size, multiplied by the number of replicas (if configured)."""
if self.config.use_per_replica_batch_size:
return self.config.batch_size * self.config.num_devices
else:
return self.config.batch_size
@property
def global_batch_size(self):
"""The global batch size across all replicas."""
return self.batch_size
@property
def local_batch_size(self):
"""The base unscaled batch size."""
if self.config.use_per_replica_batch_size:
return self.config.batch_size
else:
return self.config.batch_size // self.config.num_devices
@property
def num_steps(self) -> int:
"""The number of steps (batches) to exhaust this dataset."""
# Always divide by the global batch size to get the correct # of steps
return self.num_examples // self.global_batch_size
@property
def dtype(self) -> tf.dtypes.DType:
"""Converts the config's dtype string to a tf dtype.
Returns:
A mapping from string representation of a dtype to the `tf.dtypes.DType`.
Raises:
ValueError if the config's dtype is not supported.
"""
dtype_map = {
'float32': tf.float32,
'bfloat16': tf.bfloat16,
'float16': tf.float16,
'fp32': tf.float32,
'bf16': tf.bfloat16,
}
try:
return dtype_map[self.config.dtype]
except:
raise ValueError('Invalid DType provided. Supported types: {}'.format(
dtype_map.keys()))
@property
def image_size(self) -> int:
"""The size of each image (can be inferred from the dataset)."""
if self.config.image_size == 'infer':
return self.info.features['image'].shape[0]
else:
return int(self.config.image_size)
@property
def num_channels(self) -> int:
"""The number of image channels (can be inferred from the dataset)."""
if self.config.num_channels == 'infer':
return self.info.features['image'].shape[-1]
else:
return int(self.config.num_channels)
@property
def num_examples(self) -> int:
"""The number of examples (can be inferred from the dataset)."""
if self.config.num_examples == 'infer':
return self.info.splits[self.config.split].num_examples
else:
return int(self.config.num_examples)
@property
def num_classes(self) -> int:
"""The number of classes (can be inferred from the dataset)."""
if self.config.num_classes == 'infer':
return self.info.features['label'].num_classes
else:
return int(self.config.num_classes)
@property
def info(self) -> tfds.core.DatasetInfo:
"""The TFDS dataset info, if available."""
try:
if self.builder_info is None:
self.builder_info = tfds.builder(self.config.name).info
except ConnectionError as e:
logging.error('Failed to use TFDS to load info. Please set dataset info '
'(image_size, num_channels, num_examples, num_classes) in '
'the dataset config.')
raise e
return self.builder_info
def build(
self,
strategy: Optional[tf.distribute.Strategy] = None) -> tf.data.Dataset:
"""Construct a dataset end-to-end and return it using an optional strategy.
Args:
strategy: a strategy that, if passed, will distribute the dataset
according to that strategy. If passed and `num_devices > 1`,
`use_per_replica_batch_size` must be set to `True`.
Returns:
A TensorFlow dataset outputting batched images and labels.
"""
if strategy:
if strategy.num_replicas_in_sync != self.config.num_devices:
logging.warn(
'Passed a strategy with %d devices, but expected'
'%d devices.', strategy.num_replicas_in_sync,
self.config.num_devices)
dataset = strategy.distribute_datasets_from_function(self._build)
else:
dataset = self._build()
return dataset
def _build(
self,
input_context: Optional[tf.distribute.InputContext] = None
) -> tf.data.Dataset:
"""Construct a dataset end-to-end and return it.
Args:
input_context: An optional context provided by `tf.distribute` for
cross-replica training.
Returns:
A TensorFlow dataset outputting batched images and labels.
"""
builders = {
'tfds': self.load_tfds,
'records': self.load_records,
'synthetic': self.load_synthetic,
}
builder = builders.get(self.config.builder, None)
if builder is None:
raise ValueError('Unknown builder type {}'.format(self.config.builder))
self.input_context = input_context
dataset = builder()
dataset = self.pipeline(dataset)
return dataset
def load_tfds(self) -> tf.data.Dataset:
"""Return a dataset loading files from TFDS."""
logging.info('Using TFDS to load data.')
builder = tfds.builder(self.config.name, data_dir=self.config.data_dir)
if self.config.download:
builder.download_and_prepare()
decoders = {}
if self.config.skip_decoding:
decoders['image'] = tfds.decode.SkipDecoding()
read_config = tfds.ReadConfig(
interleave_cycle_length=10,
interleave_block_length=1,
input_context=self.input_context)
dataset = builder.as_dataset(
split=self.config.split,
as_supervised=True,
shuffle_files=True,
decoders=decoders,
read_config=read_config)
return dataset
def load_records(self) -> tf.data.Dataset:
"""Return a dataset loading files with TFRecords."""
logging.info('Using TFRecords to load data.')
if self.config.filenames is None:
if self.config.data_dir is None:
raise ValueError('Dataset must specify a path for the data files.')
file_pattern = os.path.join(self.config.data_dir,
'{}*'.format(self.config.split))
dataset = tf.data.Dataset.list_files(file_pattern, shuffle=False)
else:
dataset = tf.data.Dataset.from_tensor_slices(self.config.filenames)
return dataset
def load_synthetic(self) -> tf.data.Dataset:
"""Return a dataset generating dummy synthetic data."""
logging.info('Generating a synthetic dataset.')
def generate_data(_):
image = tf.zeros([self.image_size, self.image_size, self.num_channels],
dtype=self.dtype)
label = tf.zeros([1], dtype=tf.int32)
return image, label
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(
generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset
def pipeline(self, dataset: tf.data.Dataset) -> tf.data.Dataset:
"""Build a pipeline fetching, shuffling, and preprocessing the dataset.
Args:
dataset: A `tf.data.Dataset` that loads raw files.
Returns:
A TensorFlow dataset outputting batched images and labels.
"""
if (self.config.builder != 'tfds' and self.input_context and
self.input_context.num_input_pipelines > 1):
dataset = dataset.shard(self.input_context.num_input_pipelines,
self.input_context.input_pipeline_id)
logging.info(
'Sharding the dataset: input_pipeline_id=%d '
'num_input_pipelines=%d', self.input_context.num_input_pipelines,
self.input_context.input_pipeline_id)
if self.is_training and self.config.builder == 'records':
# Shuffle the input files.
dataset.shuffle(buffer_size=self.config.file_shuffle_buffer_size)
if self.is_training and not self.config.cache:
dataset = dataset.repeat()
if self.config.builder == 'records':
# Read the data from disk in parallel
dataset = dataset.interleave(
tf.data.TFRecordDataset,
cycle_length=10,
block_length=1,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
if self.config.cache:
dataset = dataset.cache()
if self.is_training:
dataset = dataset.shuffle(self.config.shuffle_buffer_size)
dataset = dataset.repeat()
# Parse, pre-process, and batch the data in parallel
if self.config.builder == 'records':
preprocess = self.parse_record
else:
preprocess = self.preprocess
dataset = dataset.map(
preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
if self.input_context and self.config.num_devices > 1:
if not self.config.use_per_replica_batch_size:
raise ValueError(
'The builder does not support a global batch size with more than '
'one replica. Got {} replicas. Please set a '
'`per_replica_batch_size` and enable '
'`use_per_replica_batch_size=True`.'.format(
self.config.num_devices))
# The batch size of the dataset will be multiplied by the number of
# replicas automatically when strategy.distribute_datasets_from_function
# is called, so we use local batch size here.
dataset = dataset.batch(
self.local_batch_size, drop_remainder=self.is_training)
else:
dataset = dataset.batch(
self.global_batch_size, drop_remainder=self.is_training)
# Prefetch overlaps in-feed with training
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
if self.config.tf_data_service:
if not hasattr(tf.data.experimental, 'service'):
raise ValueError('The tf_data_service flag requires Tensorflow version '
'>= 2.3.0, but the version is {}'.format(
tf.__version__))
dataset = dataset.apply(
tf.data.experimental.service.distribute(
processing_mode='parallel_epochs',
service=self.config.tf_data_service,
job_name='resnet_train'))
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return dataset
def parse_record(self, record: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
"""Parse an ImageNet record from a serialized string Tensor."""
keys_to_features = {
'image/encoded': tf.io.FixedLenFeature((), tf.string, ''),
'image/format': tf.io.FixedLenFeature((), tf.string, 'jpeg'),
'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1),
'image/class/text': tf.io.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/class/label': tf.io.VarLenFeature(dtype=tf.int64),
}
parsed = tf.io.parse_single_example(record, keys_to_features)
label = tf.reshape(parsed['image/class/label'], shape=[1])
# Subtract one so that labels are in [0, 1000)
label -= 1
image_bytes = tf.reshape(parsed['image/encoded'], shape=[])
image, label = self.preprocess(image_bytes, label)
return image, label
def preprocess(self, image: tf.Tensor,
label: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
"""Apply image preprocessing and augmentation to the image and label."""
if self.is_training:
image = preprocessing.preprocess_for_train(
image,
image_size=self.image_size,
mean_subtract=self.config.mean_subtract,
standardize=self.config.standardize,
dtype=self.dtype,
augmenter=self.augmenter)
else:
image = preprocessing.preprocess_for_eval(
image,
image_size=self.image_size,
num_channels=self.num_channels,
mean_subtract=self.config.mean_subtract,
standardize=self.config.standardize,
dtype=self.dtype)
label = tf.cast(label, tf.int32)
if self.config.one_hot:
label = tf.one_hot(label, self.num_classes)
label = tf.reshape(label, [self.num_classes])
return image, label
@classmethod
def from_params(cls, *args, **kwargs):
"""Construct a dataset builder from a default config and any overrides."""
config = DatasetConfig.from_args(*args, **kwargs)
return cls(config)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common modeling utilities."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
from typing import Optional, Text
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.python.tpu import tpu_function
@tf.keras.utils.register_keras_serializable(package='Vision')
class TpuBatchNormalization(tf.keras.layers.BatchNormalization):
"""Cross replica batch normalization."""
def __init__(self, fused: Optional[bool] = False, **kwargs):
if fused in (True, None):
raise ValueError('TpuBatchNormalization does not support fused=True.')
super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
def _cross_replica_average(self, t: tf.Tensor, num_shards_per_group: int):
"""Calculates the average value of input tensor across TPU replicas."""
num_shards = tpu_function.get_tpu_context().number_of_shards
group_assignment = None
if num_shards_per_group > 1:
if num_shards % num_shards_per_group != 0:
raise ValueError(
'num_shards: %d mod shards_per_group: %d, should be 0' %
(num_shards, num_shards_per_group))
num_groups = num_shards // num_shards_per_group
group_assignment = [[
x for x in range(num_shards) if x // num_shards_per_group == y
] for y in range(num_groups)]
return tf1.tpu.cross_replica_sum(t, group_assignment) / tf.cast(
num_shards_per_group, t.dtype)
def _moments(self, inputs: tf.Tensor, reduction_axes: int, keep_dims: int):
"""Compute the mean and variance: it overrides the original _moments."""
shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
inputs, reduction_axes, keep_dims=keep_dims)
num_shards = tpu_function.get_tpu_context().number_of_shards or 1
if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices.
num_shards_per_group = 1
else:
num_shards_per_group = max(8, num_shards // 8)
if num_shards_per_group > 1:
# Compute variance using: Var[X]= E[X^2] - E[X]^2.
shard_square_of_mean = tf.math.square(shard_mean)
shard_mean_of_square = shard_variance + shard_square_of_mean
group_mean = self._cross_replica_average(shard_mean, num_shards_per_group)
group_mean_of_square = self._cross_replica_average(
shard_mean_of_square, num_shards_per_group)
group_variance = group_mean_of_square - tf.math.square(group_mean)
return (group_mean, group_variance)
else:
return (shard_mean, shard_variance)
def get_batch_norm(batch_norm_type: Text) -> tf.keras.layers.BatchNormalization:
"""A helper to create a batch normalization getter.
Args:
batch_norm_type: The type of batch normalization layer implementation. `tpu`
will use `TpuBatchNormalization`.
Returns:
An instance of `tf.keras.layers.BatchNormalization`.
"""
if batch_norm_type == 'tpu':
return TpuBatchNormalization
return tf.keras.layers.BatchNormalization # pytype: disable=bad-return-type # typed-keras
def count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(
np.sum([
tf.keras.backend.count_params(p) for p in model.trainable_weights
]))
def load_weights(model: tf.keras.Model,
model_weights_path: Text,
weights_format: Text = 'saved_model'):
"""Load model weights from the given file path.
Args:
model: the model to load weights into
model_weights_path: the path of the model weights
weights_format: the model weights format. One of 'saved_model', 'h5', or
'checkpoint'.
"""
if weights_format == 'saved_model':
loaded_model = tf.keras.models.load_model(model_weights_path)
model.set_weights(loaded_model.get_weights())
else:
model.load_weights(model_weights_path)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Configuration definitions for EfficientNet losses, learning rates, and optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dataclasses
from official.legacy.image_classification.configs import base_configs
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class EfficientNetModelConfig(base_configs.ModelConfig):
"""Configuration for the EfficientNet model.
This configuration will default to settings used for training efficientnet-b0
on a v3-8 TPU on ImageNet.
Attributes:
name: The name of the model. Defaults to 'EfficientNet'.
num_classes: The number of classes in the model.
model_params: A dictionary that represents the parameters of the
EfficientNet model. These will be passed in to the "from_name" function.
loss: The configuration for loss. Defaults to a categorical cross entropy
implementation.
optimizer: The configuration for optimizations. Defaults to an RMSProp
configuration.
learning_rate: The configuration for learning rate. Defaults to an
exponential configuration.
"""
name: str = 'EfficientNet'
num_classes: int = 1000
model_params: base_config.Config = dataclasses.field(
default_factory=lambda: {
'model_name': 'efficientnet-b0',
'model_weights_path': '',
'weights_format': 'saved_model',
'overrides': {
'batch_norm': 'default',
'rescale_input': True,
'num_classes': 1000,
'activation': 'swish',
'dtype': 'float32',
}
})
loss: base_configs.LossConfig = base_configs.LossConfig(
name='categorical_crossentropy', label_smoothing=0.1)
optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
name='rmsprop',
decay=0.9,
epsilon=0.001,
momentum=0.9,
moving_average_decay=None)
learning_rate: base_configs.LearningRateConfig = base_configs.LearningRateConfig( # pylint: disable=line-too-long
name='exponential',
initial_lr=0.008,
decay_epochs=2.4,
decay_rate=0.97,
warmup_epochs=5,
scale_by_batch_size=1. / 128.,
staircase=True)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains definitions for EfficientNet model.
[1] Mingxing Tan, Quoc V. Le
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
ICML'19, https://arxiv.org/abs/1905.11946
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dataclasses
import math
from typing import Any, Dict, Optional, Text, Tuple
from absl import logging
import tensorflow as tf
from official.legacy.image_classification import preprocessing
from official.legacy.image_classification.efficientnet import common_modules
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class BlockConfig(base_config.Config):
"""Config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_conv: bool = False
conv_type: str = 'depthwise'
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""Default Config for Efficientnet-B0."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: int = 224
dropout_rate: float = 0.2
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio)
# pylint: disable=bad-whitespace
BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25),
BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25),
BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25),
BlockConfig.from_args(112, 192, 5, 4, 6, (2, 2), 0.25),
BlockConfig.from_args(192, 320, 3, 1, 6, (1, 1), 0.25),
# pylint: enable=bad-whitespace
)
stem_base_filters: int = 32
top_base_filters: int = 1280
activation: str = 'simple_swish'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.2
depth_divisor: int = 8
min_depth: Optional[int] = None
use_se: bool = True
input_channels: int = 3
num_classes: int = 1000
model_name: str = 'efficientnet'
rescale_input: bool = True
data_format: str = 'channels_last'
dtype: str = 'float32'
MODEL_CONFIGS = {
# (width, depth, resolution, dropout)
'efficientnet-b0': ModelConfig.from_args(1.0, 1.0, 224, 0.2),
'efficientnet-b1': ModelConfig.from_args(1.0, 1.1, 240, 0.2),
'efficientnet-b2': ModelConfig.from_args(1.1, 1.2, 260, 0.3),
'efficientnet-b3': ModelConfig.from_args(1.2, 1.4, 300, 0.3),
'efficientnet-b4': ModelConfig.from_args(1.4, 1.8, 380, 0.4),
'efficientnet-b5': ModelConfig.from_args(1.6, 2.2, 456, 0.4),
'efficientnet-b6': ModelConfig.from_args(1.8, 2.6, 528, 0.5),
'efficientnet-b7': ModelConfig.from_args(2.0, 3.1, 600, 0.5),
'efficientnet-b8': ModelConfig.from_args(2.2, 3.6, 672, 0.5),
'efficientnet-l2': ModelConfig.from_args(4.3, 5.3, 800, 0.5),
}
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def round_filters(filters: int, config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Optional[Any] = None,
depthwise: bool = False,
name: Optional[Text] = None):
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({
'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER
})
x = conv2d(**init_kwargs)(inputs)
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
x = batch_norm(
axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn')(
x)
if activation is not None:
x = tf.keras.layers.Activation(activation, name=name + '_activation')(x)
return x
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[Text] = None):
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = tf.keras.backend.image_data_format()
use_depthwise = block.conv_type != 'no_depthwise'
prefix = prefix or ''
filters = block.input_filters * block.expand_ratio
x = inputs
if block.fused_conv:
# If we use fused mbconv, skip expansion and use regular conv.
x = conv2d_block(
x,
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused')
else:
if block.expand_ratio != 1:
# Expansion phase
kernel_size = (1, 1) if use_depthwise else (3, 3)
x = conv2d_block(
x,
filters,
config,
kernel_size=kernel_size,
activation=activation,
name=prefix + 'expand')
# Depthwise Convolution
if use_depthwise:
x = conv2d_block(
x,
conv_filters=None,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise')
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(block.input_filters * block.se_ratio))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
se = conv2d_block(
se,
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce')
se = conv2d_block(
se,
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand')
x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
# Output phase
x = conv2d_block(
x, block.output_filters, config, activation=None, name=prefix + 'project')
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
x = tf.keras.layers.Activation(
tf_utils.get_activation('identity'), name=prefix + 'id')(
x)
if (block.id_skip and all(s == 1 for s in block.strides) and
block.input_filters == block.output_filters):
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling by
# drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
x = tf.keras.layers.Dropout(
drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(
x)
x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
return x
def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras
"""Creates an EfficientNet graph given the model parameters.
This function is wrapped by the `EfficientNet` class to make a tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
the output of efficientnet
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = tf.keras.backend.image_data_format()
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if data_format == 'channels_first':
# Happens on GPU/TPU if available.
x = tf.keras.layers.Permute((3, 1, 2))(x)
if rescale_input:
x = preprocessing.normalize_images(
x, num_channels=input_channels, dtype=dtype, data_format=data_format)
# Build stem
x = conv2d_block(
x,
round_filters(stem_base_filters, config),
config,
kernel_size=[3, 3],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(
round_repeats(block.num_repeat, depth_coefficient) for block in blocks)
block_num = 0
for stack_idx, block in enumerate(blocks):
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = mb_conv_block(x, block, config, block_prefix)
block_num += 1
if block.num_repeat > 1:
block = block.replace(input_filters=block.output_filters, strides=[1, 1])
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = mb_conv_block(x, block, config, prefix=block_prefix)
block_num += 1
# Build top
x = conv2d_block(
x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Dense(
num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(
x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
return x
class EfficientNet(tf.keras.Model):
"""Wrapper class for an EfficientNet Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
config: Optional[ModelConfig] = None,
overrides: Optional[Dict[Text, Any]] = None):
"""Create an EfficientNet model.
Args:
config: (optional) the main model parameters to create the model
overrides: (optional) a dict containing keys that can override config
"""
overrides = overrides or {}
config = config or ModelConfig()
self.config = config.replace(**overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
input_shape = (None, None, input_channels) # Should handle any size image
image_input = tf.keras.layers.Input(shape=input_shape)
output = efficientnet(image_input, self.config)
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
logging.info('Building model %s with params %s', model_name, self.config)
super(EfficientNet, self).__init__(
inputs=image_input, outputs=output, name=model_name)
@classmethod
def from_name(cls,
model_name: Text,
model_weights_path: Optional[Text] = None,
weights_format: Text = 'saved_model',
overrides: Optional[Dict[Text, Any]] = None):
"""Construct an EfficientNet model from a predefined model name.
E.g., `EfficientNet.from_name('efficientnet-b0')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
weights_format: the model weights format. One of 'saved_model', 'h5', or
'checkpoint'.
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
model_configs = dict(MODEL_CONFIGS)
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
model_configs.update(overrides.pop('model_config', {}))
if model_name not in model_configs:
raise ValueError('Unknown model name {}'.format(model_name))
config = model_configs[model_name]
model = cls(config=config, overrides=overrides)
if model_weights_path:
common_modules.load_weights(
model, model_weights_path, weights_format=weights_format)
return model
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A script to export TF-Hub SavedModel."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import os
from absl import app
from absl import flags
import tensorflow as tf
from official.legacy.image_classification.efficientnet import efficientnet_model
FLAGS = flags.FLAGS
flags.DEFINE_string("model_name", None, "EfficientNet model name.")
flags.DEFINE_string("model_path", None, "File path to TF model checkpoint.")
flags.DEFINE_string("export_path", None,
"TF-Hub SavedModel destination path to export.")
def export_tfhub(model_path, hub_destination, model_name):
"""Restores a tf.keras.Model and saves for TF-Hub."""
model_configs = dict(efficientnet_model.MODEL_CONFIGS)
config = model_configs[model_name]
image_input = tf.keras.layers.Input(
shape=(None, None, 3), name="image_input", dtype=tf.float32)
x = image_input * 255.0
ouputs = efficientnet_model.efficientnet(x, config)
hub_model = tf.keras.Model(image_input, ouputs)
ckpt = tf.train.Checkpoint(model=hub_model)
ckpt.restore(model_path).assert_existing_objects_matched()
hub_model.save(
os.path.join(hub_destination, "classification"), include_optimizer=False)
feature_vector_output = hub_model.get_layer(name="top_pool").get_output_at(0)
hub_model2 = tf.keras.Model(image_input, feature_vector_output)
hub_model2.save(
os.path.join(hub_destination, "feature-vector"), include_optimizer=False)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
export_tfhub(FLAGS.model_path, FLAGS.export_path, FLAGS.model_name)
if __name__ == "__main__":
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Learning rate utilities for vision tasks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, Mapping, Optional
import numpy as np
import tensorflow as tf
BASE_LEARNING_RATE = 0.1
class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
"""A wrapper for LearningRateSchedule that includes warmup steps."""
def __init__(self,
lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule,
warmup_steps: int,
warmup_lr: Optional[float] = None):
"""Add warmup decay to a learning rate schedule.
Args:
lr_schedule: base learning rate scheduler
warmup_steps: number of warmup steps
warmup_lr: an optional field for the final warmup learning rate. This
should be provided if the base `lr_schedule` does not contain this
field.
"""
super(WarmupDecaySchedule, self).__init__()
self._lr_schedule = lr_schedule
self._warmup_steps = warmup_steps
self._warmup_lr = warmup_lr
def __call__(self, step: int):
lr = self._lr_schedule(step)
if self._warmup_steps:
if self._warmup_lr is not None:
initial_learning_rate = tf.convert_to_tensor(
self._warmup_lr, name="initial_learning_rate")
else:
initial_learning_rate = tf.convert_to_tensor(
self._lr_schedule.initial_learning_rate,
name="initial_learning_rate")
dtype = initial_learning_rate.dtype
global_step_recomp = tf.cast(step, dtype)
warmup_steps = tf.cast(self._warmup_steps, dtype)
warmup_lr = initial_learning_rate * global_step_recomp / warmup_steps
lr = tf.cond(global_step_recomp < warmup_steps, lambda: warmup_lr,
lambda: lr)
return lr
def get_config(self) -> Mapping[str, Any]:
config = self._lr_schedule.get_config()
config.update({
"warmup_steps": self._warmup_steps,
"warmup_lr": self._warmup_lr,
})
return config
class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, batch_size: int, total_steps: int, warmup_steps: int):
"""Creates the consine learning rate tensor with linear warmup.
Args:
batch_size: The training batch size used in the experiment.
total_steps: Total training steps.
warmup_steps: Steps for the warm up period.
"""
super(CosineDecayWithWarmup, self).__init__()
base_lr_batch_size = 256
self._total_steps = total_steps
self._init_learning_rate = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
self._warmup_steps = warmup_steps
def __call__(self, global_step: int):
global_step = tf.cast(global_step, dtype=tf.float32)
warmup_steps = self._warmup_steps
init_lr = self._init_learning_rate
total_steps = self._total_steps
linear_warmup = global_step / warmup_steps * init_lr
cosine_learning_rate = init_lr * (tf.cos(np.pi *
(global_step - warmup_steps) /
(total_steps - warmup_steps)) +
1.0) / 2.0
learning_rate = tf.where(global_step < warmup_steps, linear_warmup,
cosine_learning_rate)
return learning_rate
def get_config(self):
return {
"total_steps": self._total_steps,
"warmup_learning_rate": self._warmup_learning_rate,
"warmup_steps": self._warmup_steps,
"init_learning_rate": self._init_learning_rate,
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment