Commit 10c0e96b authored by Fan Yang's avatar Fan Yang Committed by A. Unique TensorFlower
Browse files

Migrate official/vision/image_classification to official/legacy/image_classification

PiperOrigin-RevId: 410878028
parent e2b671b5
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Configuration definitions for EfficientNet losses, learning rates, and optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dataclasses
from official.legacy.image_classification.configs import base_configs
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class EfficientNetModelConfig(base_configs.ModelConfig):
"""Configuration for the EfficientNet model.
This configuration will default to settings used for training efficientnet-b0
on a v3-8 TPU on ImageNet.
Attributes:
name: The name of the model. Defaults to 'EfficientNet'.
num_classes: The number of classes in the model.
model_params: A dictionary that represents the parameters of the
EfficientNet model. These will be passed in to the "from_name" function.
loss: The configuration for loss. Defaults to a categorical cross entropy
implementation.
optimizer: The configuration for optimizations. Defaults to an RMSProp
configuration.
learning_rate: The configuration for learning rate. Defaults to an
exponential configuration.
"""
name: str = 'EfficientNet'
num_classes: int = 1000
model_params: base_config.Config = dataclasses.field(
default_factory=lambda: {
'model_name': 'efficientnet-b0',
'model_weights_path': '',
'weights_format': 'saved_model',
'overrides': {
'batch_norm': 'default',
'rescale_input': True,
'num_classes': 1000,
'activation': 'swish',
'dtype': 'float32',
}
})
loss: base_configs.LossConfig = base_configs.LossConfig(
name='categorical_crossentropy', label_smoothing=0.1)
optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
name='rmsprop',
decay=0.9,
epsilon=0.001,
momentum=0.9,
moving_average_decay=None)
learning_rate: base_configs.LearningRateConfig = base_configs.LearningRateConfig( # pylint: disable=line-too-long
name='exponential',
initial_lr=0.008,
decay_epochs=2.4,
decay_rate=0.97,
warmup_epochs=5,
scale_by_batch_size=1. / 128.,
staircase=True)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Contains definitions for EfficientNet model.
[1] Mingxing Tan, Quoc V. Le
EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
ICML'19, https://arxiv.org/abs/1905.11946
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dataclasses
import math
from typing import Any, Dict, Optional, Text, Tuple
from absl import logging
import tensorflow as tf
from official.legacy.image_classification import preprocessing
from official.legacy.image_classification.efficientnet import common_modules
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class BlockConfig(base_config.Config):
"""Config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_conv: bool = False
conv_type: str = 'depthwise'
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""Default Config for Efficientnet-B0."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: int = 224
dropout_rate: float = 0.2
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio)
# pylint: disable=bad-whitespace
BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), 0.25),
BlockConfig.from_args(16, 24, 3, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(24, 40, 5, 2, 6, (2, 2), 0.25),
BlockConfig.from_args(40, 80, 3, 3, 6, (2, 2), 0.25),
BlockConfig.from_args(80, 112, 5, 3, 6, (1, 1), 0.25),
BlockConfig.from_args(112, 192, 5, 4, 6, (2, 2), 0.25),
BlockConfig.from_args(192, 320, 3, 1, 6, (1, 1), 0.25),
# pylint: enable=bad-whitespace
)
stem_base_filters: int = 32
top_base_filters: int = 1280
activation: str = 'simple_swish'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.2
depth_divisor: int = 8
min_depth: Optional[int] = None
use_se: bool = True
input_channels: int = 3
num_classes: int = 1000
model_name: str = 'efficientnet'
rescale_input: bool = True
data_format: str = 'channels_last'
dtype: str = 'float32'
MODEL_CONFIGS = {
# (width, depth, resolution, dropout)
'efficientnet-b0': ModelConfig.from_args(1.0, 1.0, 224, 0.2),
'efficientnet-b1': ModelConfig.from_args(1.0, 1.1, 240, 0.2),
'efficientnet-b2': ModelConfig.from_args(1.1, 1.2, 260, 0.3),
'efficientnet-b3': ModelConfig.from_args(1.2, 1.4, 300, 0.3),
'efficientnet-b4': ModelConfig.from_args(1.4, 1.8, 380, 0.4),
'efficientnet-b5': ModelConfig.from_args(1.6, 2.2, 456, 0.4),
'efficientnet-b6': ModelConfig.from_args(1.8, 2.6, 528, 0.5),
'efficientnet-b7': ModelConfig.from_args(2.0, 3.1, 600, 0.5),
'efficientnet-b8': ModelConfig.from_args(2.2, 3.6, 672, 0.5),
'efficientnet-l2': ModelConfig.from_args(4.3, 5.3, 800, 0.5),
}
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def round_filters(filters: int, config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Optional[Any] = None,
depthwise: bool = False,
name: Optional[Text] = None):
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({
'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER
})
x = conv2d(**init_kwargs)(inputs)
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
x = batch_norm(
axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn')(
x)
if activation is not None:
x = tf.keras.layers.Activation(activation, name=name + '_activation')(x)
return x
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[Text] = None):
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = tf.keras.backend.image_data_format()
use_depthwise = block.conv_type != 'no_depthwise'
prefix = prefix or ''
filters = block.input_filters * block.expand_ratio
x = inputs
if block.fused_conv:
# If we use fused mbconv, skip expansion and use regular conv.
x = conv2d_block(
x,
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused')
else:
if block.expand_ratio != 1:
# Expansion phase
kernel_size = (1, 1) if use_depthwise else (3, 3)
x = conv2d_block(
x,
filters,
config,
kernel_size=kernel_size,
activation=activation,
name=prefix + 'expand')
# Depthwise Convolution
if use_depthwise:
x = conv2d_block(
x,
conv_filters=None,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise')
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(block.input_filters * block.se_ratio))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
se = conv2d_block(
se,
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce')
se = conv2d_block(
se,
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand')
x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
# Output phase
x = conv2d_block(
x, block.output_filters, config, activation=None, name=prefix + 'project')
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
x = tf.keras.layers.Activation(
tf_utils.get_activation('identity'), name=prefix + 'id')(
x)
if (block.id_skip and all(s == 1 for s in block.strides) and
block.input_filters == block.output_filters):
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling by
# drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
x = tf.keras.layers.Dropout(
drop_connect_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(
x)
x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
return x
def efficientnet(image_input: tf.keras.layers.Input, config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras
"""Creates an EfficientNet graph given the model parameters.
This function is wrapped by the `EfficientNet` class to make a tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
the output of efficientnet
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = tf.keras.backend.image_data_format()
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if data_format == 'channels_first':
# Happens on GPU/TPU if available.
x = tf.keras.layers.Permute((3, 1, 2))(x)
if rescale_input:
x = preprocessing.normalize_images(
x, num_channels=input_channels, dtype=dtype, data_format=data_format)
# Build stem
x = conv2d_block(
x,
round_filters(stem_base_filters, config),
config,
kernel_size=[3, 3],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(
round_repeats(block.num_repeat, depth_coefficient) for block in blocks)
block_num = 0
for stack_idx, block in enumerate(blocks):
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = mb_conv_block(x, block, config, block_prefix)
block_num += 1
if block.num_repeat > 1:
block = block.replace(input_filters=block.output_filters, strides=[1, 1])
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = mb_conv_block(x, block, config, prefix=block_prefix)
block_num += 1
# Build top
x = conv2d_block(
x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
x = tf.keras.layers.GlobalAveragePooling2D(name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Dense(
num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(
x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
return x
class EfficientNet(tf.keras.Model):
"""Wrapper class for an EfficientNet Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
config: Optional[ModelConfig] = None,
overrides: Optional[Dict[Text, Any]] = None):
"""Create an EfficientNet model.
Args:
config: (optional) the main model parameters to create the model
overrides: (optional) a dict containing keys that can override config
"""
overrides = overrides or {}
config = config or ModelConfig()
self.config = config.replace(**overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
input_shape = (None, None, input_channels) # Should handle any size image
image_input = tf.keras.layers.Input(shape=input_shape)
output = efficientnet(image_input, self.config)
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
logging.info('Building model %s with params %s', model_name, self.config)
super(EfficientNet, self).__init__(
inputs=image_input, outputs=output, name=model_name)
@classmethod
def from_name(cls,
model_name: Text,
model_weights_path: Optional[Text] = None,
weights_format: Text = 'saved_model',
overrides: Optional[Dict[Text, Any]] = None):
"""Construct an EfficientNet model from a predefined model name.
E.g., `EfficientNet.from_name('efficientnet-b0')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
weights_format: the model weights format. One of 'saved_model', 'h5', or
'checkpoint'.
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
model_configs = dict(MODEL_CONFIGS)
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
model_configs.update(overrides.pop('model_config', {}))
if model_name not in model_configs:
raise ValueError('Unknown model name {}'.format(model_name))
config = model_configs[model_name]
model = cls(config=config, overrides=overrides)
if model_weights_path:
common_modules.load_weights(
model, model_weights_path, weights_format=weights_format)
return model
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A script to export TF-Hub SavedModel."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import os
from absl import app
from absl import flags
import tensorflow as tf
from official.legacy.image_classification.efficientnet import efficientnet_model
FLAGS = flags.FLAGS
flags.DEFINE_string("model_name", None, "EfficientNet model name.")
flags.DEFINE_string("model_path", None, "File path to TF model checkpoint.")
flags.DEFINE_string("export_path", None,
"TF-Hub SavedModel destination path to export.")
def export_tfhub(model_path, hub_destination, model_name):
"""Restores a tf.keras.Model and saves for TF-Hub."""
model_configs = dict(efficientnet_model.MODEL_CONFIGS)
config = model_configs[model_name]
image_input = tf.keras.layers.Input(
shape=(None, None, 3), name="image_input", dtype=tf.float32)
x = image_input * 255.0
ouputs = efficientnet_model.efficientnet(x, config)
hub_model = tf.keras.Model(image_input, ouputs)
ckpt = tf.train.Checkpoint(model=hub_model)
ckpt.restore(model_path).assert_existing_objects_matched()
hub_model.save(
os.path.join(hub_destination, "classification"), include_optimizer=False)
feature_vector_output = hub_model.get_layer(name="top_pool").get_output_at(0)
hub_model2 = tf.keras.Model(image_input, feature_vector_output)
hub_model2.save(
os.path.join(hub_destination, "feature-vector"), include_optimizer=False)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
export_tfhub(FLAGS.model_path, FLAGS.export_path, FLAGS.model_name)
if __name__ == "__main__":
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Learning rate utilities for vision tasks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from typing import Any, Mapping, Optional
import numpy as np
import tensorflow as tf
BASE_LEARNING_RATE = 0.1
class WarmupDecaySchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
"""A wrapper for LearningRateSchedule that includes warmup steps."""
def __init__(self,
lr_schedule: tf.keras.optimizers.schedules.LearningRateSchedule,
warmup_steps: int,
warmup_lr: Optional[float] = None):
"""Add warmup decay to a learning rate schedule.
Args:
lr_schedule: base learning rate scheduler
warmup_steps: number of warmup steps
warmup_lr: an optional field for the final warmup learning rate. This
should be provided if the base `lr_schedule` does not contain this
field.
"""
super(WarmupDecaySchedule, self).__init__()
self._lr_schedule = lr_schedule
self._warmup_steps = warmup_steps
self._warmup_lr = warmup_lr
def __call__(self, step: int):
lr = self._lr_schedule(step)
if self._warmup_steps:
if self._warmup_lr is not None:
initial_learning_rate = tf.convert_to_tensor(
self._warmup_lr, name="initial_learning_rate")
else:
initial_learning_rate = tf.convert_to_tensor(
self._lr_schedule.initial_learning_rate,
name="initial_learning_rate")
dtype = initial_learning_rate.dtype
global_step_recomp = tf.cast(step, dtype)
warmup_steps = tf.cast(self._warmup_steps, dtype)
warmup_lr = initial_learning_rate * global_step_recomp / warmup_steps
lr = tf.cond(global_step_recomp < warmup_steps, lambda: warmup_lr,
lambda: lr)
return lr
def get_config(self) -> Mapping[str, Any]:
config = self._lr_schedule.get_config()
config.update({
"warmup_steps": self._warmup_steps,
"warmup_lr": self._warmup_lr,
})
return config
class CosineDecayWithWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, batch_size: int, total_steps: int, warmup_steps: int):
"""Creates the consine learning rate tensor with linear warmup.
Args:
batch_size: The training batch size used in the experiment.
total_steps: Total training steps.
warmup_steps: Steps for the warm up period.
"""
super(CosineDecayWithWarmup, self).__init__()
base_lr_batch_size = 256
self._total_steps = total_steps
self._init_learning_rate = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
self._warmup_steps = warmup_steps
def __call__(self, global_step: int):
global_step = tf.cast(global_step, dtype=tf.float32)
warmup_steps = self._warmup_steps
init_lr = self._init_learning_rate
total_steps = self._total_steps
linear_warmup = global_step / warmup_steps * init_lr
cosine_learning_rate = init_lr * (tf.cos(np.pi *
(global_step - warmup_steps) /
(total_steps - warmup_steps)) +
1.0) / 2.0
learning_rate = tf.where(global_step < warmup_steps, linear_warmup,
cosine_learning_rate)
return learning_rate
def get_config(self):
return {
"total_steps": self._total_steps,
"warmup_learning_rate": self._warmup_learning_rate,
"warmup_steps": self._warmup_steps,
"init_learning_rate": self._init_learning_rate,
}
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for learning_rate."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.legacy.image_classification import learning_rate
class LearningRateTests(tf.test.TestCase):
def test_warmup_decay(self):
"""Basic computational test for warmup decay."""
initial_lr = 0.01
decay_steps = 100
decay_rate = 0.01
warmup_steps = 10
base_lr = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=initial_lr,
decay_steps=decay_steps,
decay_rate=decay_rate)
lr = learning_rate.WarmupDecaySchedule(
lr_schedule=base_lr, warmup_steps=warmup_steps)
for step in range(warmup_steps - 1):
config = lr.get_config()
self.assertEqual(config['warmup_steps'], warmup_steps)
self.assertAllClose(
self.evaluate(lr(step)), step / warmup_steps * initial_lr)
def test_cosine_decay_with_warmup(self):
"""Basic computational test for cosine decay with warmup."""
expected_lrs = [0.0, 0.1, 0.05, 0.0]
lr = learning_rate.CosineDecayWithWarmup(
batch_size=256, total_steps=3, warmup_steps=1)
for step in [0, 1, 2, 3]:
self.assertAllClose(lr(step), expected_lrs[step])
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs a simple model on the MNIST dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
# Import libraries
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
import tensorflow_datasets as tfds
from official.common import distribute_utils
from official.legacy.image_classification.resnet import common
from official.utils.flags import core as flags_core
from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def build_model():
"""Constructs the ML model used to predict handwritten digits."""
image = tf.keras.layers.Input(shape=(28, 28, 1))
y = tf.keras.layers.Conv2D(filters=32,
kernel_size=5,
padding='same',
activation='relu')(image)
y = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2),
padding='same')(y)
y = tf.keras.layers.Conv2D(filters=32,
kernel_size=5,
padding='same',
activation='relu')(y)
y = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
strides=(2, 2),
padding='same')(y)
y = tf.keras.layers.Flatten()(y)
y = tf.keras.layers.Dense(1024, activation='relu')(y)
y = tf.keras.layers.Dropout(0.4)(y)
probs = tf.keras.layers.Dense(10, activation='softmax')(y)
model = tf.keras.models.Model(image, probs, name='mnist')
return model
@tfds.decode.make_decoder(output_dtype=tf.float32)
def decode_image(example, feature):
"""Convert image to float32 and normalize from [0, 255] to [0.0, 1.0]."""
return tf.cast(feature.decode_example(example), dtype=tf.float32) / 255
def run(flags_obj, datasets_override=None, strategy_override=None):
"""Run MNIST model training and eval loop using native Keras APIs.
Args:
flags_obj: An object containing parsed flag values.
datasets_override: A pair of `tf.data.Dataset` objects to train the model,
representing the train and test sets.
strategy_override: A `tf.distribute.Strategy` object to use for model.
Returns:
Dictionary of training and eval stats.
"""
# Start TF profiler server.
tf.profiler.experimental.server.start(flags_obj.profiler_port)
strategy = strategy_override or distribute_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus,
tpu_address=flags_obj.tpu)
strategy_scope = distribute_utils.get_strategy_scope(strategy)
mnist = tfds.builder('mnist', data_dir=flags_obj.data_dir)
if flags_obj.download:
mnist.download_and_prepare()
mnist_train, mnist_test = datasets_override or mnist.as_dataset(
split=['train', 'test'],
decoders={'image': decode_image()}, # pylint: disable=no-value-for-parameter
as_supervised=True)
train_input_dataset = mnist_train.cache().repeat().shuffle(
buffer_size=50000).batch(flags_obj.batch_size)
eval_input_dataset = mnist_test.cache().repeat().batch(flags_obj.batch_size)
with strategy_scope:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
0.05, decay_steps=100000, decay_rate=0.96)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
model = build_model()
model.compile(
optimizer=optimizer,
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
num_train_examples = mnist.info.splits['train'].num_examples
train_steps = num_train_examples // flags_obj.batch_size
train_epochs = flags_obj.train_epochs
ckpt_full_path = os.path.join(flags_obj.model_dir, 'model.ckpt-{epoch:04d}')
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
ckpt_full_path, save_weights_only=True),
tf.keras.callbacks.TensorBoard(log_dir=flags_obj.model_dir),
]
num_eval_examples = mnist.info.splits['test'].num_examples
num_eval_steps = num_eval_examples // flags_obj.batch_size
history = model.fit(
train_input_dataset,
epochs=train_epochs,
steps_per_epoch=train_steps,
callbacks=callbacks,
validation_steps=num_eval_steps,
validation_data=eval_input_dataset,
validation_freq=flags_obj.epochs_between_evals)
export_path = os.path.join(flags_obj.model_dir, 'saved_model')
model.save(export_path, include_optimizer=False)
eval_output = model.evaluate(
eval_input_dataset, steps=num_eval_steps, verbose=2)
stats = common.build_stats(history, eval_output, callbacks)
return stats
def define_mnist_flags():
"""Define command line flags for MNIST model."""
flags_core.define_base(
clean=True,
num_gpu=True,
train_epochs=True,
epochs_between_evals=True,
distribution_strategy=True)
flags_core.define_device()
flags_core.define_distribution()
flags.DEFINE_bool('download', True,
'Whether to download data to `--data_dir`.')
flags.DEFINE_integer('profiler_port', 9012,
'Port to start profiler server on.')
FLAGS.set_default('batch_size', 1024)
def main(_):
model_helpers.apply_clean(FLAGS)
stats = run(flags.FLAGS)
logging.info('Run stats:\n%s', stats)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_mnist_flags()
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test the Keras MNIST model on GPU."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
from absl.testing import parameterized
import tensorflow as tf
from tensorflow.python.distribute import combinations
from tensorflow.python.distribute import strategy_combinations
from official.legacy.image_classification import mnist_main
from official.utils.testing import integration
mnist_main.define_mnist_flags()
def eager_strategy_combinations():
return combinations.combine(
distribution=[
strategy_combinations.default_strategy,
strategy_combinations.cloud_tpu_strategy,
strategy_combinations.one_device_strategy_gpu,
],)
class KerasMnistTest(tf.test.TestCase, parameterized.TestCase):
"""Unit tests for sample Keras MNIST model."""
_tempdir = None
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(KerasMnistTest, cls).setUpClass()
def tearDown(self):
super(KerasMnistTest, self).tearDown()
tf.io.gfile.rmtree(self.get_temp_dir())
@combinations.generate(eager_strategy_combinations())
def test_end_to_end(self, distribution):
"""Test Keras MNIST model with `strategy`."""
extra_flags = [
"-train_epochs",
"1",
# Let TFDS find the metadata folder automatically
"--data_dir="
]
dummy_data = (
tf.ones(shape=(10, 28, 28, 1), dtype=tf.int32),
tf.range(10),
)
datasets = (
tf.data.Dataset.from_tensor_slices(dummy_data),
tf.data.Dataset.from_tensor_slices(dummy_data),
)
run = functools.partial(
mnist_main.run,
datasets_override=datasets,
strategy_override=distribution)
integration.run_synthetic(
main=run,
synth=False,
tmp_root=self.create_tempdir().full_path,
extra_flags=extra_flags)
if __name__ == "__main__":
tf.test.main()
This diff is collapsed.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for optimizer_factory."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
from absl.testing import parameterized
import tensorflow as tf
from official.legacy.image_classification import optimizer_factory
from official.legacy.image_classification.configs import base_configs
class OptimizerFactoryTest(tf.test.TestCase, parameterized.TestCase):
def build_toy_model(self) -> tf.keras.Model:
"""Creates a toy `tf.Keras.Model`."""
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(1,)))
return model
@parameterized.named_parameters(
('sgd', 'sgd', 0., False), ('momentum', 'momentum', 0., False),
('rmsprop', 'rmsprop', 0., False), ('adam', 'adam', 0., False),
('adamw', 'adamw', 0., False),
('momentum_lookahead', 'momentum', 0., True),
('sgd_ema', 'sgd', 0.999, False),
('momentum_ema', 'momentum', 0.999, False),
('rmsprop_ema', 'rmsprop', 0.999, False))
def test_optimizer(self, optimizer_name, moving_average_decay, lookahead):
"""Smoke test to be sure no syntax errors."""
model = self.build_toy_model()
params = {
'learning_rate': 0.001,
'rho': 0.09,
'momentum': 0.,
'epsilon': 1e-07,
'moving_average_decay': moving_average_decay,
'lookahead': lookahead,
}
optimizer = optimizer_factory.build_optimizer(
optimizer_name=optimizer_name,
base_learning_rate=params['learning_rate'],
params=params,
model=model)
self.assertTrue(issubclass(type(optimizer), tf.keras.optimizers.Optimizer))
def test_unknown_optimizer(self):
with self.assertRaises(ValueError):
optimizer_factory.build_optimizer(
optimizer_name='this_optimizer_does_not_exist',
base_learning_rate=None,
params=None)
def test_learning_rate_without_decay_or_warmups(self):
params = base_configs.LearningRateConfig(
name='exponential',
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=None,
warmup_epochs=None,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params, batch_size=batch_size, train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
@parameterized.named_parameters(('exponential', 'exponential'),
('cosine_with_warmup', 'cosine_with_warmup'))
def test_learning_rate_with_decay_and_warmup(self, lr_decay_type):
"""Basic smoke test for syntax."""
params = base_configs.LearningRateConfig(
name=lr_decay_type,
initial_lr=0.01,
decay_rate=0.01,
decay_epochs=1,
warmup_epochs=1,
scale_by_batch_size=0.01,
examples_per_epoch=1,
boundaries=[0],
multipliers=[0, 1])
batch_size = 1
train_epochs = 1
train_steps = 1
lr = optimizer_factory.build_learning_rate(
params=params,
batch_size=batch_size,
train_epochs=train_epochs,
train_steps=train_steps)
self.assertTrue(
issubclass(
type(lr), tf.keras.optimizers.schedules.LearningRateSchedule))
if __name__ == '__main__':
tf.test.main()
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment