Commit 1c32ebf2 authored by Fan Yang's avatar Fan Yang Committed by A. Unique TensorFlower
Browse files

Internal change.

PiperOrigin-RevId: 421362994
parent ada0e36b
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Configuration definitions for EfficientNet losses, learning rates, and optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, Mapping
import dataclasses
from official.modeling.hyperparams import base_config
from official.vision.image_classification.configs import base_configs
@dataclasses.dataclass
class EfficientNetModelConfig(base_configs.ModelConfig):
"""Configuration for the EfficientNet model.
This configuration will default to settings used for training efficientnet-b0
on a v3-8 TPU on ImageNet.
Attributes:
name: The name of the model. Defaults to 'EfficientNet'.
num_classes: The number of classes in the model.
model_params: A dictionary that represents the parameters of the
EfficientNet model. These will be passed in to the "from_name" function.
loss: The configuration for loss. Defaults to a categorical cross entropy
implementation.
optimizer: The configuration for optimizations. Defaults to an RMSProp
configuration.
learning_rate: The configuration for learning rate. Defaults to an
exponential configuration.
"""
name: str = 'EfficientNet'
num_classes: int = 1000
model_params: base_config.Config = dataclasses.field(
default_factory=lambda: {
'model_name': 'efficientnet-b0',
'model_weights_path': '',
'weights_format': 'saved_model',
'overrides': {
'batch_norm': 'default',
'rescale_input': True,
'num_classes': 1000,
'activation': 'swish',
'dtype': 'float32',
}
})
loss: base_configs.LossConfig = base_configs.LossConfig(
name='categorical_crossentropy', label_smoothing=0.1)
optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
name='rmsprop',
decay=0.9,
epsilon=0.001,
momentum=0.9,
moving_average_decay=None)
learning_rate: base_configs.LearningRateConfig = base_configs.LearningRateConfig( # pylint: disable=line-too-long
name='exponential',
initial_lr=0.008,
decay_epochs=2.4,
decay_rate=0.97,
warmup_epochs=5,
scale_by_batch_size=1. / 128.,
staircase=True)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains definitions for EfficientNet model.
[1] Mingxing Tan, Quoc V. Le
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
ICML'19, https://arxiv.org/abs/1905.11946
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os
from typing import Any, Dict, Optional, Text, Tuple
from absl import logging
from dataclasses import dataclass
import tensorflow as tf
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
from official.vision.image_classification import preprocessing
from official.vision.image_classification.efficientnet import common_modules
@dataclass
class BlockConfig(base_config.Config):
"""Config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_conv: bool = False
conv_type: str = 'depthwise'
@dataclass
class ModelConfig(base_config.Config):
"""Default Config for Efficientnet-B0."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: int = 224
dropout_rate: float = 0.2
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio)
# pylint: disable=bad-whitespace
BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25),
BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25),
BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25),
BlockConfig.from_args(112, 192, 5, 4, 6, (2, 2), 0.25),
BlockConfig.from_args(192, 320, 3, 1, 6, (1, 1), 0.25),
# pylint: enable=bad-whitespace
)
stem_base_filters: int = 32
top_base_filters: int = 1280
activation: str = 'simple_swish'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.2
depth_divisor: int = 8
min_depth: Optional[int] = None
use_se: bool = True
input_channels: int = 3
num_classes: int = 1000
model_name: str = 'efficientnet'
rescale_input: bool = True
data_format: str = 'channels_last'
dtype: str = 'float32'
MODEL_CONFIGS = {
# (width, depth, resolution, dropout)
'efficientnet-b0': ModelConfig.from_args(1.0, 1.0, 224, 0.2),
'efficientnet-b1': ModelConfig.from_args(1.0, 1.1, 240, 0.2),
'efficientnet-b2': ModelConfig.from_args(1.1, 1.2, 260, 0.3),
'efficientnet-b3': ModelConfig.from_args(1.2, 1.4, 300, 0.3),
'efficientnet-b4': ModelConfig.from_args(1.4, 1.8, 380, 0.4),
'efficientnet-b5': ModelConfig.from_args(1.6, 2.2, 456, 0.4),
'efficientnet-b6': ModelConfig.from_args(1.8, 2.6, 528, 0.5),
'efficientnet-b7': ModelConfig.from_args(2.0, 3.1, 600, 0.5),
'efficientnet-b8': ModelConfig.from_args(2.2, 3.6, 672, 0.5),
'efficientnet-l2': ModelConfig.from_args(4.3, 5.3, 800, 0.5),
}
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def round_filters(filters: int, config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Optional[Any] = None,
depthwise: bool = False,
name: Optional[Text] = None):
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({
'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER
})
x = conv2d(**init_kwargs)(inputs)
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
x = batch_norm(
axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn')(
x)
if activation is not None:
x = tf.keras.layers.Activation(activation, name=name + '_activation')(x)
return x
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[Text] = None):
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = tf.keras.backend.image_data_format()
use_depthwise = block.conv_type != 'no_depthwise'
prefix = prefix or ''
filters = block.input_filters * block.expand_ratio
x = inputs
if block.fused_conv:
# If we use fused mbconv, skip expansion and use regular conv.
x = conv2d_block(
x,
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused')
else:
if block.expand_ratio != 1:
# Expansion phase
kernel_size = (1, 1) if use_depthwise else (3, 3)
x = conv2d_block(
x,
filters,
config,
kernel_size=kernel_size,
activation=activation,
name=prefix + 'expand')
# Depthwise Convolution
if use_depthwise:
x = conv2d_block(
x,
conv_filters=None,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise')
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(block.input_filters * block.se_ratio))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
se = conv2d_block(
se,
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce')
se = conv2d_block(
se,
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand')
x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
# Output phase
x = conv2d_block(
x, block.output_filters, config, activation=None, name=prefix + 'project')
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
x = tf.keras.layers.Activation(
tf_utils.get_activation('identity'), name=prefix + 'id')(
x)
if (block.id_skip and all(s == 1 for s in block.strides) and
block.input_filters == block.output_filters):
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling by
# drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
x = tf.keras.layers.Dropout(
drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(
x)
x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
return x
def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras
"""Creates an EfficientNet graph given the model parameters.
This function is wrapped by the `EfficientNet` class to make a tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
the output of efficientnet
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = tf.keras.backend.image_data_format()
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if data_format == 'channels_first':
# Happens on GPU/TPU if available.
x = tf.keras.layers.Permute((3, 1, 2))(x)
if rescale_input:
x = preprocessing.normalize_images(
x, num_channels=input_channels, dtype=dtype, data_format=data_format)
# Build stem
x = conv2d_block(
x,
round_filters(stem_base_filters, config),
config,
kernel_size=[3, 3],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(
round_repeats(block.num_repeat, depth_coefficient) for block in blocks)
block_num = 0
for stack_idx, block in enumerate(blocks):
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = mb_conv_block(x, block, config, block_prefix)
block_num += 1
if block.num_repeat > 1:
block = block.replace(input_filters=block.output_filters, strides=[1, 1])
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = mb_conv_block(x, block, config, prefix=block_prefix)
block_num += 1
# Build top
x = conv2d_block(
x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Dense(
num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(
x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
return x
class EfficientNet(tf.keras.Model):
"""Wrapper class for an EfficientNet Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
config: Optional[ModelConfig] = None,
overrides: Optional[Dict[Text, Any]] = None):
"""Create an EfficientNet model.
Args:
config: (optional) the main model parameters to create the model
overrides: (optional) a dict containing keys that can override config
"""
overrides = overrides or {}
config = config or ModelConfig()
self.config = config.replace(**overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
input_shape = (None, None, input_channels) # Should handle any size image
image_input = tf.keras.layers.Input(shape=input_shape)
output = efficientnet(image_input, self.config)
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
logging.info('Building model %s with params %s', model_name, self.config)
super(EfficientNet, self).__init__(
inputs=image_input, outputs=output, name=model_name)
@classmethod
def from_name(cls,
model_name: Text,
model_weights_path: Optional[Text] = None,
weights_format: Text = 'saved_model',
overrides: Optional[Dict[Text, Any]] = None):
"""Construct an EfficientNet model from a predefined model name.
E.g., `EfficientNet.from_name('efficientnet-b0')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
weights_format: the model weights format. One of 'saved_model', 'h5', or
'checkpoint'.
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
model_configs = dict(MODEL_CONFIGS)
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
model_configs.update(overrides.pop('model_config', {}))
if model_name not in model_configs:
raise ValueError('Unknown model name {}'.format(model_name))
config = model_configs[model_name]
model = cls(config=config, overrides=overrides)
if model_weights_path:
common_modules.load_weights(
model, model_weights_path, weights_format=weights_format)
return model
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A script to export TF-Hub SavedModel."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app
from absl import flags
import tensorflow as tf
from official.vision.image_classification.efficientnet import efficientnet_model
FLAGS = flags.FLAGS
flags.DEFINE_string("model_name", None, "EfficientNet model name.")
flags.DEFINE_string("model_path", None, "File path to TF model checkpoint.")
flags.DEFINE_string("export_path", None,
"TF-Hub SavedModel destination path to export.")
def export_tfhub(model_path, hub_destination, model_name):
"""Restores a tf.keras.Model and saves for TF-Hub."""
model_configs = dict(efficientnet_model.MODEL_CONFIGS)
config = model_configs[model_name]
image_input = tf.keras.layers.Input(
shape=(None, None, 3), name="image_input", dtype=tf.float32)
x = image_input * 255.0
ouputs = efficientnet_model.efficientnet(x, config)
hub_model = tf.keras.Model(image_input, ouputs)
ckpt = tf.train.Checkpoint(model=hub_model)
ckpt.restore(model_path).assert_existing_objects_matched()
hub_model.save(
os.path.join(hub_destination, "classification"), include_optimizer=False)
feature_vector_output = hub_model.get_layer(name="top_pool").get_output_at(0)
hub_model2 = tf.keras.Model(image_input, feature_vector_output)
hub_model2.save(
os.path.join(hub_destination, "feature-vector"), include_optimizer=False)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
export_tfhub(FLAGS.model_path, FLAGS.export_path, FLAGS.model_name)
if __name__ == "__main__":
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Learning rate utilities for vision tasks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, Mapping, Optional
import numpy as np
import tensorflow as tf
BASE_LEARNING_RATE = 0.1
class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
"""A wrapper for LearningRateSchedule that includes warmup steps."""
def __init__(self,
lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule,
warmup_steps: int,
warmup_lr: Optional[float] = None):
"""Add warmup decay to a learning rate schedule.
Args:
lr_schedule: base learning rate scheduler
warmup_steps: number of warmup steps
warmup_lr: an optional field for the final warmup learning rate. This
should be provided if the base `lr_schedule` does not contain this
field.
"""
super(WarmupDecaySchedule, self).__init__()
self._lr_schedule = lr_schedule
self._warmup_steps = warmup_steps
self._warmup_lr = warmup_lr
def __call__(self, step: int):
lr = self._lr_schedule(step)
if self._warmup_steps:
if self._warmup_lr is not None:
initial_learning_rate = tf.convert_to_tensor(
self._warmup_lr, name="initial_learning_rate")
else:
initial_learning_rate = tf.convert_to_tensor(
self._lr_schedule.initial_learning_rate,
name="initial_learning_rate")
dtype = initial_learning_rate.dtype
global_step_recomp = tf.cast(step, dtype)
warmup_steps = tf.cast(self._warmup_steps, dtype)
warmup_lr = initial_learning_rate * global_step_recomp / warmup_steps
lr = tf.cond(global_step_recomp < warmup_steps, lambda: warmup_lr,
lambda: lr)
return lr
def get_config(self) -> Mapping[str, Any]:
config = self._lr_schedule.get_config()
config.update({
"warmup_steps": self._warmup_steps,
"warmup_lr": self._warmup_lr,
})
return config
class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, batch_size: int, total_steps: int, warmup_steps: int):
"""Creates the consine learning rate tensor with linear warmup.
Args:
batch_size: The training batch size used in the experiment.
total_steps: Total training steps.
warmup_steps: Steps for the warm up period.
"""
super(CosineDecayWithWarmup, self).__init__()
base_lr_batch_size = 256
self._total_steps = total_steps
self._init_learning_rate = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
self._warmup_steps = warmup_steps
def __call__(self, global_step: int):
global_step = tf.cast(global_step, dtype=tf.float32)
warmup_steps = self._warmup_steps
init_lr = self._init_learning_rate
total_steps = self._total_steps
linear_warmup = global_step / warmup_steps * init_lr
cosine_learning_rate = init_lr * (tf.cos(np.pi *
(global_step - warmup_steps) /
(total_steps - warmup_steps)) +
1.0) / 2.0
learning_rate = tf.where(global_step < warmup_steps, linear_warmup,
cosine_learning_rate)
return learning_rate
def get_config(self):
return {
"total_steps": self._total_steps,
"warmup_learning_rate": self._warmup_learning_rate,
"warmup_steps": self._warmup_steps,
"init_learning_rate": self._init_learning_rate,
}
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for learning_rate."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.vision.image_classification import learning_rate
class LearningRateTests(tf.test.TestCase):
def test_warmup_decay(self):
"""Basic computational test for warmup decay."""
initial_lr = 0.01
decay_steps = 100
decay_rate = 0.01
warmup_steps = 10
base_lr = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=initial_lr,
decay_steps=decay_steps,
decay_rate=decay_rate)
lr = learning_rate.WarmupDecaySchedule(
lr_schedule=base_lr, warmup_steps=warmup_steps)
for step in range(warmup_steps - 1):
config = lr.get_config()
self.assertEqual(config['warmup_steps'], warmup_steps)
self.assertAllClose(
self.evaluate(lr(step)), step / warmup_steps * initial_lr)
def test_cosine_decay_with_warmup(self):
"""Basic computational test for cosine decay with warmup."""
expected_lrs = [0.0, 0.1, 0.05, 0.0]
lr = learning_rate.CosineDecayWithWarmup(
batch_size=256, total_steps=3, warmup_steps=1)
for step in [0, 1, 2, 3]:
self.assertAllClose(lr(step), expected_lrs[step])
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs a simple model on the MNIST dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
# Import libraries
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
import tensorflow_datasets as tfds
from official.common import distribute_utils
from official.utils.flags import core as flags_core
from official.utils.misc import model_helpers
from official.vision.image_classification.resnet import common
FLAGS = flags.FLAGS
def build_model():
"""Constructs the ML model used to predict handwritten digits."""
image = tf.keras.layers.Input(shape=(28, 28, 1))
y = tf.keras.layers.Conv2D(filters=32,
kernel_size=5,
padding='same',
activation='relu')(image)
y = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2),
padding='same')(y)
y = tf.keras.layers.Conv2D(filters=32,
kernel_size=5,
padding='same',
activation='relu')(y)
y = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2),
padding='same')(y)
y = tf.keras.layers.Flatten()(y)
y = tf.keras.layers.Dense(1024, activation='relu')(y)
y = tf.keras.layers.Dropout(0.4)(y)
probs = tf.keras.layers.Dense(10, activation='softmax')(y)
model = tf.keras.models.Model(image, probs, name='mnist')
return model
@tfds.decode.make_decoder(output_dtype=tf.float32)
def decode_image(example, feature):
"""Convert image to float32 and normalize from [0, 255] to [0.0, 1.0]."""
return tf.cast(feature.decode_example(example), dtype=tf.float32) / 255
def run(flags_obj, datasets_override=None, strategy_override=None):
"""Run MNIST model training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
datasets_override: A pair of `tf.data.Dataset` objects to train the model,
representing the train and test sets.
strategy_override: A `tf.distribute.Strategy` object to use for model.
Returns:
Dictionary of training and eval stats.
"""
# Start TF profiler server.
tf.profiler.experimental.server.start(flags_obj.profiler_port)
strategy = strategy_override or distribute_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus,
tpu_address=flags_obj.tpu)
strategy_scope = distribute_utils.get_strategy_scope(strategy)
mnist = tfds.builder('mnist', data_dir=flags_obj.data_dir)
if flags_obj.download:
mnist.download_and_prepare()
mnist_train, mnist_test = datasets_override or mnist.as_dataset(
split=['train', 'test'],
decoders={'image': decode_image()}, # pylint: disable=no-value-for-parameter
as_supervised=True)
train_input_dataset = mnist_train.cache().repeat().shuffle(
buffer_size=50000).batch(flags_obj.batch_size)
eval_input_dataset = mnist_test.cache().repeat().batch(flags_obj.batch_size)
with strategy_scope:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
0.05, decay_steps=100000, decay_rate=0.96)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
model = build_model()
model.compile(
optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
num_train_examples = mnist.info.splits['train'].num_examples
train_steps = num_train_examples // flags_obj.batch_size
train_epochs = flags_obj.train_epochs
ckpt_full_path = os.path.join(flags_obj.model_dir, 'model.ckpt-{epoch:04d}')
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
ckpt_full_path, save_weights_only=True),
tf.keras.callbacks.TensorBoard(log_dir=flags_obj.model_dir),
]
num_eval_examples = mnist.info.splits['test'].num_examples
num_eval_steps = num_eval_examples // flags_obj.batch_size
history = model.fit(
train_input_dataset,
epochs=train_epochs,
steps_per_epoch=train_steps,
callbacks=callbacks,
validation_steps=num_eval_steps,
validation_data=eval_input_dataset,
validation_freq=flags_obj.epochs_between_evals)
export_path = os.path.join(flags_obj.model_dir, 'saved_model')
model.save(export_path, include_optimizer=False)
eval_output = model.evaluate(
eval_input_dataset, steps=num_eval_steps, verbose=2)
stats = common.build_stats(history, eval_output, callbacks)
return stats
def define_mnist_flags():
"""Define command line flags for MNIST model."""
flags_core.define_base(
clean=True,
num_gpu=True,
train_epochs=True,
epochs_between_evals=True,
distribution_strategy=True)
flags_core.define_device()
flags_core.define_distribution()
flags.DEFINE_bool('download', True,
'Whether to download data to `--data_dir`.')
flags.DEFINE_integer('profiler_port', 9012,
'Port to start profiler server on.')
FLAGS.set_default('batch_size', 1024)
def main(_):
model_helpers.apply_clean(FLAGS)
stats = run(flags.FLAGS)
logging.info('Run stats:\n%s', stats)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_mnist_flags()
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test the Keras MNIST model on GPU."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.utils.testing import integration
from official.vision.image_classification import mnist_main
mnist_main.define_mnist_flags()
def eager_strategy_combinations():
return combinations.combine(
distribution=[
strategy_combinations.default_strategy,
strategy_combinations.cloud_tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],)
class KerasMnistTest(tf.test.TestCase, parameterized.TestCase):
"""Unit tests for sample Keras MNIST model."""
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasMnistTest, cls).setUpClass()
def tearDown(self):
super(KerasMnistTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
@combinations.generate(eager_strategy_combinations())
def test_end_to_end(self, distribution):
"""Test Keras MNIST model with `strategy`."""
extra_flags = [
"-train_epochs",
"1",
# Let TFDS find the metadata folder automatically
"--data_dir="
]
dummy_data = (
tf.ones(shape=(10, 28, 28, 1), dtype=tf.int32),
tf.range(10),
)
datasets = (
tf.data.Dataset.from_tensor_slices(dummy_data),
tf.data.Dataset.from_tensor_slices(dummy_data),
)
run = functools.partial(
mnist_main.run,
datasets_override=datasets,
strategy_override=distribution)
integration.run_synthetic(
main=run,
synth=False,
tmp_root=self.create_tempdir().full_path,
extra_flags=extra_flags)
if __name__ == "__main__":
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for optimizer_factory."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from official.vision.image_classification import optimizer_factory
from official.vision.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
def build_toy_model(self) -> tf.keras.Model:
"""Creates a toy `tf.Keras.Model`."""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
return model
@parameterized.named_parameters(
('sgd', 'sgd', 0., False), ('momentum', 'momentum', 0., False),
('rmsprop', 'rmsprop', 0., False), ('adam', 'adam', 0., False),
('adamw', 'adamw', 0., False),
('momentum_lookahead', 'momentum', 0., True),
('sgd_ema', 'sgd', 0.999, False),
('momentum_ema', 'momentum', 0.999, False),
('rmsprop_ema', 'rmsprop', 0.999, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors."""
model = self.build_toy_model()
params = {
'learning_rate': 0.001,
'rho': 0.09,
'momentum': 0.,
'epsilon': 1e-07,
'moving_average_decay': moving_average_decay,
'lookahead': lookahead,
}
optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'],
params=params,
model=model)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self):
with self.assertRaises(ValueError):
optimizer_factory.build_optimizer(
optimizer_name='this_optimizer_does_not_exist',
base_learning_rate=None,
params=None)
def test_learning_rate_without_decay_or_warmups(self):
params = base_configs.LearningRateConfig(
name='exponential',
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=None,
warmup_epochs=None,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params, batch_size=batch_size, train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
@parameterized.named_parameters(('exponential', 'exponential'),
('cosine_with_warmup', 'cosine_with_warmup'))
def test_learning_rate_with_decay_and_warmup(self, lr_decay_type):
"""Basic smoke test for syntax."""
params = base_configs.LearningRateConfig(
name=lr_decay_type,
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=1,
warmup_epochs=1,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_epochs = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params,
batch_size=batch_size,
train_epochs=train_epochs,
train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
if __name__ == '__main__':
tf.test.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment