Commit c57e975a authored by saberkun's avatar saberkun
Browse files

Merge pull request #10338 from srihari-humbarwadi:readme

PiperOrigin-RevId: 413033276
parents 7fb4f3cd acf4156e
......@@ -12,7 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-CV package definition."""
# pylint: disable=wildcard-import
from official.vision.keras_cv import losses
from official.vision.keras_cv import ops
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common util functions and classes used by both keras cifar and imagenet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import flags
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
FLAGS = flags.FLAGS
BASE_LEARNING_RATE = 0.1 # This matches Jing's version.
TRAIN_TOP_1 = 'training_accuracy_top_1'
LR_SCHEDULE = [ # (multiplier, epoch to start) tuples
(1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)
]
class PiecewiseConstantDecayWithWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Piecewise constant decay with warmup schedule."""
def __init__(self,
batch_size,
epoch_size,
warmup_epochs,
boundaries,
multipliers,
compute_lr_on_cpu=True,
name=None):
super(PiecewiseConstantDecayWithWarmup, self).__init__()
if len(boundaries) != len(multipliers) - 1:
raise ValueError('The length of boundaries must be 1 less than the '
'length of multipliers')
base_lr_batch_size = 256
steps_per_epoch = epoch_size // batch_size
self.rescaled_lr = BASE_LEARNING_RATE * batch_size / base_lr_batch_size
self.step_boundaries = [float(steps_per_epoch) * x for x in boundaries]
self.lr_values = [self.rescaled_lr * m for m in multipliers]
self.warmup_steps = warmup_epochs * steps_per_epoch
self.compute_lr_on_cpu = compute_lr_on_cpu
self.name = name
self.learning_rate_ops_cache = {}
def __call__(self, step):
if tf.executing_eagerly():
return self._get_learning_rate(step)
# In an eager function or graph, the current implementation of optimizer
# repeatedly call and thus create ops for the learning rate schedule. To
# avoid this, we cache the ops if not executing eagerly.
graph = tf.compat.v1.get_default_graph()
if graph not in self.learning_rate_ops_cache:
if self.compute_lr_on_cpu:
with tf.device('/device:CPU:0'):
self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
else:
self.learning_rate_ops_cache[graph] = self._get_learning_rate(step)
return self.learning_rate_ops_cache[graph]
def _get_learning_rate(self, step):
"""Compute learning rate at given step."""
with tf.name_scope('PiecewiseConstantDecayWithWarmup'):
def warmup_lr(step):
return self.rescaled_lr * (
tf.cast(step, tf.float32) / tf.cast(self.warmup_steps, tf.float32))
def piecewise_lr(step):
return tf.compat.v1.train.piecewise_constant(step, self.step_boundaries,
self.lr_values)
return tf.cond(step < self.warmup_steps, lambda: warmup_lr(step),
lambda: piecewise_lr(step))
def get_config(self):
return {
'rescaled_lr': self.rescaled_lr,
'step_boundaries': self.step_boundaries,
'lr_values': self.lr_values,
'warmup_steps': self.warmup_steps,
'compute_lr_on_cpu': self.compute_lr_on_cpu,
'name': self.name
}
def get_optimizer(learning_rate=0.1):
"""Returns optimizer to use."""
# The learning_rate is overwritten at the beginning of each step by callback.
return tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
def get_callbacks(pruning_method=None,
enable_checkpoint_and_export=False,
model_dir=None):
"""Returns common callbacks."""
time_callback = keras_utils.TimeHistory(
FLAGS.batch_size,
FLAGS.log_steps,
logdir=FLAGS.model_dir if FLAGS.enable_tensorboard else None)
callbacks = [time_callback]
if FLAGS.enable_tensorboard:
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir=FLAGS.model_dir, profile_batch=FLAGS.profile_steps)
callbacks.append(tensorboard_callback)
is_pruning_enabled = pruning_method is not None
if is_pruning_enabled:
callbacks.append(tfmot.sparsity.keras.UpdatePruningStep())
if model_dir is not None:
callbacks.append(
tfmot.sparsity.keras.PruningSummaries(
log_dir=model_dir, profile_batch=0))
if enable_checkpoint_and_export:
if model_dir is not None:
ckpt_full_path = os.path.join(model_dir, 'model.ckpt-{epoch:04d}')
callbacks.append(
tf.keras.callbacks.ModelCheckpoint(
ckpt_full_path, save_weights_only=True))
return callbacks
def build_stats(history, eval_output, callbacks):
"""Normalizes and returns dictionary of stats.
Args:
history: Results of the training step. Supports both categorical_accuracy
and sparse_categorical_accuracy.
eval_output: Output of the eval step. Assumes first value is eval_loss and
second value is accuracy_top_1.
callbacks: a list of callbacks which might include a time history callback
used during keras.fit.
Returns:
Dictionary of normalized results.
"""
stats = {}
if eval_output:
stats['accuracy_top_1'] = float(eval_output[1])
stats['eval_loss'] = float(eval_output[0])
if history and history.history:
train_hist = history.history
# Gets final loss from training.
stats['loss'] = float(train_hist['loss'][-1])
# Gets top_1 training accuracy.
if 'categorical_accuracy' in train_hist:
stats[TRAIN_TOP_1] = float(train_hist['categorical_accuracy'][-1])
elif 'sparse_categorical_accuracy' in train_hist:
stats[TRAIN_TOP_1] = float(train_hist['sparse_categorical_accuracy'][-1])
elif 'accuracy' in train_hist:
stats[TRAIN_TOP_1] = float(train_hist['accuracy'][-1])
if not callbacks:
return stats
# Look for the time history callback which was used during keras.fit
for callback in callbacks:
if isinstance(callback, keras_utils.TimeHistory):
timestamp_log = callback.timestamp_log
stats['step_timestamp_log'] = timestamp_log
stats['train_finish_time'] = callback.train_finish_time
if callback.epoch_runtime_log:
stats['avg_exp_per_second'] = callback.average_examples_per_second
return stats
def define_keras_flags(model=False,
optimizer=False,
pretrained_filepath=False):
"""Define flags for Keras models."""
flags_core.define_base(
clean=True,
num_gpu=True,
run_eagerly=True,
train_epochs=True,
epochs_between_evals=True,
distribution_strategy=True)
flags_core.define_performance(
num_parallel_calls=False,
synthetic_data=True,
dtype=True,
all_reduce_alg=True,
num_packs=True,
tf_gpu_thread_mode=True,
datasets_num_private_threads=True,
loss_scale=True,
fp16_implementation=True,
tf_data_experimental_slack=True,
enable_xla=True,
training_dataset_cache=True)
flags_core.define_image()
flags_core.define_benchmark()
flags_core.define_distribution()
flags.adopt_module_key_flags(flags_core)
flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?')
flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?')
# TODO(b/135607288): Remove this flag once we understand the root cause of
# slowdown when setting the learning phase in Keras backend.
flags.DEFINE_boolean(
name='set_learning_phase_to_train',
default=True,
help='If skip eval, also set Keras learning phase to 1 (training).')
flags.DEFINE_boolean(
name='explicit_gpu_placement',
default=False,
help='If not using distribution strategy, explicitly set device scope '
'for the Keras training loop.')
flags.DEFINE_boolean(
name='use_trivial_model',
default=False,
help='Whether to use a trivial Keras model.')
flags.DEFINE_boolean(
name='report_accuracy_metrics',
default=True,
help='Report metrics during training and evaluation.')
flags.DEFINE_boolean(
name='use_tensor_lr',
default=True,
help='Use learning rate tensor instead of a callback.')
flags.DEFINE_boolean(
name='enable_tensorboard',
default=False,
help='Whether to enable TensorBoard callback.')
flags.DEFINE_string(
name='profile_steps',
default=None,
help='Save profiling data to model dir at given range of global steps. The '
'value must be a comma separated pair of positive integers, specifying '
'the first and last step to profile. For example, "--profile_steps=2,4" '
'triggers the profiler to process 3 steps, starting from the 2nd step. '
'Note that profiler has a non-trivial performance overhead, and the '
'output file can be gigantic if profiling many steps.')
flags.DEFINE_integer(
name='train_steps',
default=None,
help='The number of steps to run for training. If it is larger than '
'# batches per epoch, then use # batches per epoch. This flag will be '
'ignored if train_epochs is set to be larger than 1. ')
flags.DEFINE_boolean(
name='batchnorm_spatial_persistent',
default=True,
help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
flags.DEFINE_boolean(
name='enable_get_next_as_optional',
default=False,
help='Enable get_next_as_optional behavior in DistributedIterator.')
flags.DEFINE_boolean(
name='enable_checkpoint_and_export',
default=False,
help='Whether to enable a checkpoint callback and export the savedmodel.')
flags.DEFINE_string(name='tpu', default='', help='TPU address to connect to.')
flags.DEFINE_integer(
name='steps_per_loop',
default=None,
help='Number of steps per training loop. Only training step happens '
'inside the loop. Callbacks will not be called inside. Will be capped at '
'steps per epoch.')
flags.DEFINE_boolean(
name='use_tf_while_loop',
default=True,
help='Whether to build a tf.while_loop inside the training loop on the '
'host. Setting it to True is critical to have peak performance on '
'TPU.')
if model:
flags.DEFINE_string('model', 'resnet50_v1.5',
'Name of model preset. (mobilenet, resnet50_v1.5)')
if optimizer:
flags.DEFINE_string(
'optimizer', 'resnet50_default', 'Name of optimizer preset. '
'(mobilenet_default, resnet50_default)')
# TODO(kimjaehong): Replace as general hyper-params not only for mobilenet.
flags.DEFINE_float(
'initial_learning_rate_per_sample', 0.00007,
'Initial value of learning rate per sample for '
'mobilenet_default.')
flags.DEFINE_float('lr_decay_factor', 0.94,
'Learning rate decay factor for mobilenet_default.')
flags.DEFINE_float('num_epochs_per_decay', 2.5,
'Number of epochs per decay for mobilenet_default.')
if pretrained_filepath:
flags.DEFINE_string('pretrained_filepath', '', 'Pretrained file path.')
def get_synth_data(height, width, num_channels, num_classes, dtype):
"""Creates a set of synthetic random data.
Args:
height: Integer height that will be used to create a fake image tensor.
width: Integer width that will be used to create a fake image tensor.
num_channels: Integer depth that will be used to create a fake image tensor.
num_classes: Number of classes that should be represented in the fake labels
tensor
dtype: Data type for features/images.
Returns:
A tuple of tensors representing the inputs and labels.
"""
# Synthetic input should be within [0, 255].
inputs = tf.random.truncated_normal([height, width, num_channels],
dtype=dtype,
mean=127,
stddev=60,
name='synthetic_inputs')
labels = tf.random.uniform([1],
minval=0,
maxval=num_classes - 1,
dtype=tf.int32,
name='synthetic_labels')
return inputs, labels
def define_pruning_flags():
"""Define flags for pruning methods."""
flags.DEFINE_string(
'pruning_method', None, 'Pruning method.'
'None (no pruning) or polynomial_decay.')
flags.DEFINE_float('pruning_initial_sparsity', 0.0,
'Initial sparsity for pruning.')
flags.DEFINE_float('pruning_final_sparsity', 0.5,
'Final sparsity for pruning.')
flags.DEFINE_integer('pruning_begin_step', 0, 'Begin step for pruning.')
flags.DEFINE_integer('pruning_end_step', 100000, 'End step for pruning.')
flags.DEFINE_integer('pruning_frequency', 100, 'Frequency for pruning.')
def define_clustering_flags():
"""Define flags for clustering methods."""
flags.DEFINE_string('clustering_method', None,
'None (no clustering) or selective_clustering '
'(cluster last three Conv2D layers of the model).')
def get_synth_input_fn(height,
width,
num_channels,
num_classes,
dtype=tf.float32,
drop_remainder=True):
"""Returns an input function that returns a dataset with random data.
This input_fn returns a data set that iterates over a set of random data and
bypasses all preprocessing, e.g. jpeg decode and copy. The host to device
copy is still included. This used to find the upper throughput bound when
tuning the full input pipeline.
Args:
height: Integer height that will be used to create a fake image tensor.
width: Integer width that will be used to create a fake image tensor.
num_channels: Integer depth that will be used to create a fake image tensor.
num_classes: Number of classes that should be represented in the fake labels
tensor
dtype: Data type for features/images.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
Returns:
An input_fn that can be used in place of a real one to return a dataset
that can be used for iteration.
"""
# pylint: disable=unused-argument
def input_fn(is_training, data_dir, batch_size, *args, **kwargs):
"""Returns dataset filled with random data."""
inputs, labels = get_synth_data(
height=height,
width=width,
num_channels=num_channels,
num_classes=num_classes,
dtype=dtype)
# Cast to float32 for Keras model.
labels = tf.cast(labels, dtype=tf.float32)
data = tf.data.Dataset.from_tensors((inputs, labels)).repeat()
# `drop_remainder` will make dataset produce outputs with known shapes.
data = data.batch(batch_size, drop_remainder=drop_remainder)
data = data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
return data
return input_fn
def set_cudnn_batchnorm_mode():
"""Set CuDNN batchnorm mode for better performance.
Note: Spatial Persistent mode may lead to accuracy losses for certain
models.
"""
if FLAGS.batchnorm_spatial_persistent:
os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'
else:
os.environ.pop('TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT', None)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides utilities to preprocess images.
Training images are sampled using the provided bounding boxes, and subsequently
cropped to the sampled bounding box. Images are additionally flipped randomly,
then resized to the target output size (without aspect-ratio preservation).
Images used during evaluation are resized (with aspect-ratio preservation) and
centrally cropped.
All images undergo mean color subtraction.
Note that these steps are colloquially referred to as "ResNet preprocessing,"
and they differ from "VGG preprocessing," which does not use bounding boxes
and instead does an aspect-preserving resize followed by random crop during
training. (These both differ from "Inception preprocessing," which introduces
color distortion steps.)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import logging
import tensorflow as tf
DEFAULT_IMAGE_SIZE = 224
NUM_CHANNELS = 3
NUM_CLASSES = 1001
NUM_IMAGES = {
'train': 1281167,
'validation': 50000,
}
_NUM_TRAIN_FILES = 1024
_SHUFFLE_BUFFER = 10000
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
# The lower bound for the smallest side of the image for aspect-preserving
# resizing. For example, if an image is 500 x 1000, it will be resized to
# _RESIZE_MIN x (_RESIZE_MIN * 2).
_RESIZE_MIN = 256
def process_record_dataset(dataset,
is_training,
batch_size,
shuffle_buffer,
parse_record_fn,
dtype=tf.float32,
datasets_num_private_threads=None,
drop_remainder=False,
tf_data_experimental_slack=False):
"""Given a Dataset with raw records, return an iterator over the records.
Args:
dataset: A Dataset representing raw records
is_training: A boolean denoting whether the input is for training.
batch_size: The number of samples per batch.
shuffle_buffer: The buffer size to use when shuffling records. A larger
value results in better randomness, but smaller values reduce startup time
and use less memory.
parse_record_fn: A function that takes a raw record and returns the
corresponding (image, label) pair.
dtype: Data type to use for images/features.
datasets_num_private_threads: Number of threads for a private threadpool
created for all datasets computation.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
tf_data_experimental_slack: Whether to enable tf.data's `experimental_slack`
option.
Returns:
Dataset of (image, label) pairs ready for iteration.
"""
# Defines a specific size thread pool for tf.data operations.
if datasets_num_private_threads:
options = tf.data.Options()
options.experimental_threading.private_threadpool_size = (
datasets_num_private_threads)
dataset = dataset.with_options(options)
logging.info('datasets_num_private_threads: %s',
datasets_num_private_threads)
if is_training:
# Shuffles records before repeating to respect epoch boundaries.
dataset = dataset.shuffle(buffer_size=shuffle_buffer)
# Repeats the dataset for the number of epochs to train.
dataset = dataset.repeat()
# Parses the raw records into images and labels.
dataset = dataset.map(
lambda value: parse_record_fn(value, is_training, dtype),
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
# Operations between the final prefetch and the get_next call to the iterator
# will happen synchronously during run time. We prefetch here again to
# background all of the above processing work and keep it out of the
# critical training path. Setting buffer_size to tf.data.experimental.AUTOTUNE
# allows DistributionStrategies to adjust how many batches to fetch based
# on how many devices are present.
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
options = tf.data.Options()
options.experimental_slack = tf_data_experimental_slack
dataset = dataset.with_options(options)
return dataset
def get_filenames(is_training, data_dir):
"""Return filenames for dataset."""
if is_training:
return [
os.path.join(data_dir, 'train-%05d-of-01024' % i)
for i in range(_NUM_TRAIN_FILES)
]
else:
return [
os.path.join(data_dir, 'validation-%05d-of-00128' % i)
for i in range(128)
]
def parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields (values are included as examples):
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized Example
protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
"""
# Dense features in Example proto.
feature_map = {
'image/encoded':
tf.io.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/class/label':
tf.io.FixedLenFeature([], dtype=tf.int64, default_value=-1),
'image/class/text':
tf.io.FixedLenFeature([], dtype=tf.string, default_value=''),
}
sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update({
k: sparse_float32 for k in [
'image/object/bbox/xmin', 'image/object/bbox/ymin',
'image/object/bbox/xmax', 'image/object/bbox/ymax'
]
})
features = tf.io.parse_single_example(
serialized=example_serialized, features=feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(a=bbox, perm=[0, 2, 1])
return features['image/encoded'], label, bbox
def parse_record(raw_record, is_training, dtype):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
Args:
raw_record: scalar Tensor tf.string containing a serialized Example protocol
buffer.
is_training: A boolean denoting whether the input is for training.
dtype: data type to use for images/features.
Returns:
Tuple with processed image tensor in a channel-last format and
one-hot-encoded label tensor.
"""
image_buffer, label, bbox = parse_example_proto(raw_record)
image = preprocess_image(
image_buffer=image_buffer,
bbox=bbox,
output_height=DEFAULT_IMAGE_SIZE,
output_width=DEFAULT_IMAGE_SIZE,
num_channels=NUM_CHANNELS,
is_training=is_training)
image = tf.cast(image, dtype)
# Subtract one so that labels are in [0, 1000), and cast to float32 for
# Keras model.
label = tf.cast(
tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1,
dtype=tf.float32)
return image, label
def get_parse_record_fn(use_keras_image_data_format=False):
"""Get a function for parsing the records, accounting for image format.
This is useful by handling different types of Keras models. For instance,
the current resnet_model.resnet50 input format is always channel-last,
whereas the keras_applications mobilenet input format depends on
tf.keras.backend.image_data_format(). We should set
use_keras_image_data_format=False for the former and True for the latter.
Args:
use_keras_image_data_format: A boolean denoting whether data format is keras
backend image data format. If False, the image format is channel-last. If
True, the image format matches tf.keras.backend.image_data_format().
Returns:
Function to use for parsing the records.
"""
def parse_record_fn(raw_record, is_training, dtype):
image, label = parse_record(raw_record, is_training, dtype)
if use_keras_image_data_format:
if tf.keras.backend.image_data_format() == 'channels_first':
image = tf.transpose(image, perm=[2, 0, 1])
return image, label
return parse_record_fn
def input_fn(is_training,
data_dir,
batch_size,
dtype=tf.float32,
datasets_num_private_threads=None,
parse_record_fn=parse_record,
input_context=None,
drop_remainder=False,
tf_data_experimental_slack=False,
training_dataset_cache=False,
filenames=None):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
parse_record_fn: Function to use for parsing the records.
input_context: A `tf.distribute.InputContext` object passed in by
`tf.distribute.Strategy`.
drop_remainder: A boolean indicates whether to drop the remainder of the
batches. If True, the batch dimension will be static.
tf_data_experimental_slack: Whether to enable tf.data's `experimental_slack`
option.
training_dataset_cache: Whether to cache the training dataset on workers.
Typically used to improve training performance when training data is in
remote storage and can fit into worker memory.
filenames: Optional field for providing the file names of the TFRecords.
Returns:
A dataset that can be used for iteration.
"""
if filenames is None:
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.Dataset.from_tensor_slices(filenames)
if input_context:
logging.info(
'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
input_context.input_pipeline_id, input_context.num_input_pipelines)
dataset = dataset.shard(input_context.num_input_pipelines,
input_context.input_pipeline_id)
if is_training:
# Shuffle the input files
dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
# Convert to individual records.
# cycle_length = 10 means that up to 10 files will be read and deserialized in
# parallel. You may want to increase this number if you have a large number of
# CPU cores.
dataset = dataset.interleave(
tf.data.TFRecordDataset,
cycle_length=10,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
if is_training and training_dataset_cache:
# Improve training performance when training data is in remote storage and
# can fit into worker memory.
dataset = dataset.cache()
return process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=_SHUFFLE_BUFFER,
parse_record_fn=parse_record_fn,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
drop_remainder=drop_remainder,
tf_data_experimental_slack=tf_data_experimental_slack,
)
def _decode_crop_and_flip(image_buffer, bbox, num_channels):
"""Crops the given image to a random part of the image, and randomly flips.
We use the fused decode_and_crop op, which performs better than the two ops
used separately in series, but note that this requires that the image be
passed in as an un-decoded string Tensor.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as [ymin,
xmin, ymax, xmax].
num_channels: Integer depth of the image buffer for decoding.
Returns:
3-D tensor with cropped image.
"""
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=0.1,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.05, 1.0],
max_attempts=100,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, _ = sample_distorted_bounding_box
# Reassemble the bounding box in the format the crop op requires.
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
# Use the fused decode and crop op here, which is faster than each in series.
cropped = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=num_channels)
# Flip to add a little more random distortion in.
cropped = tf.image.random_flip_left_right(cropped)
return cropped
def _central_crop(image, crop_height, crop_width):
"""Performs central crops of the given image list.
Args:
image: a 3-D image tensor
crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop.
Returns:
3-D tensor with cropped image.
"""
shape = tf.shape(input=image)
height, width = shape[0], shape[1]
amount_to_be_cropped_h = (height - crop_height)
crop_top = amount_to_be_cropped_h // 2
amount_to_be_cropped_w = (width - crop_width)
crop_left = amount_to_be_cropped_w // 2
return tf.slice(image, [crop_top, crop_left, 0],
[crop_height, crop_width, -1])
def _mean_image_subtraction(image, means, num_channels):
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image = _mean_image_subtraction(image, means)
Note that the rank of `image` must be known.
Args:
image: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
num_channels: number of color channels in the image that will be distorted.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if image.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
# We have a 1-D tensor of means; convert to 3-D.
# Note(b/130245863): we explicitly call `broadcast` instead of simply
# expanding dimensions for better performance.
means = tf.broadcast_to(means, tf.shape(image))
return image - means
def _smallest_size_at_least(height, width, resize_min):
"""Computes new shape with the smallest side equal to `smallest_side`.
Computes new shape with the smallest side equal to `smallest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
resize_min: A python integer or scalar `Tensor` indicating the size of the
smallest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: an int32 scalar tensor indicating the new width.
"""
resize_min = tf.cast(resize_min, tf.float32)
# Convert to floats to make subsequent calculations go smoothly.
height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32)
smaller_dim = tf.minimum(height, width)
scale_ratio = resize_min / smaller_dim
# Convert back to ints to make heights and widths that TF ops will accept.
new_height = tf.cast(height * scale_ratio, tf.int32)
new_width = tf.cast(width * scale_ratio, tf.int32)
return new_height, new_width
def _aspect_preserving_resize(image, resize_min):
"""Resize images preserving the original aspect ratio.
Args:
image: A 3-D image `Tensor`.
resize_min: A python integer or scalar `Tensor` indicating the size of the
smallest side after resize.
Returns:
resized_image: A 3-D tensor containing the resized image.
"""
shape = tf.shape(input=image)
height, width = shape[0], shape[1]
new_height, new_width = _smallest_size_at_least(height, width, resize_min)
return _resize_image(image, new_height, new_width)
def _resize_image(image, height, width):
"""Simple wrapper around tf.resize_images.
This is primarily to make sure we use the same `ResizeMethod` and other
details each time.
Args:
image: A 3-D image `Tensor`.
height: The target height for the resized image.
width: The target width for the resized image.
Returns:
resized_image: A 3-D tensor containing the resized image. The first two
dimensions have the shape [height, width].
"""
return tf.compat.v1.image.resize(
image, [height, width],
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
def preprocess_image(image_buffer,
bbox,
output_height,
output_width,
num_channels,
is_training=False):
"""Preprocesses the given image.
Preprocessing includes decoding, cropping, and resizing for both training
and eval images. Training preprocessing, however, introduces some random
distortion of the image to improve accuracy.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as [ymin,
xmin, ymax, xmax].
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
num_channels: Integer depth of the image buffer for decoding.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
Returns:
A preprocessed image.
"""
if is_training:
# For training, we want to randomize some of the distortions.
image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
image = _resize_image(image, output_height, output_width)
else:
# For validation, we want to decode, resize, then just crop the middle.
image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
image = _aspect_preserving_resize(image, _RESIZE_MIN)
image = _central_crop(image, output_height, output_width)
image.set_shape([output_height, output_width, num_channels])
return _mean_image_subtraction(image, CHANNEL_MEANS, num_channels)
......@@ -12,46 +12,45 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common configuration settings."""
# pylint:disable=wildcard-import
import dataclasses
# Lint as: python3
"""Configuration definitions for ResNet losses, learning rates, and optimizers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from official.core.config_definitions import *
import dataclasses
from official.legacy.image_classification.configs import base_configs
from official.modeling.hyperparams import base_config
# TODO(hongkuny): These configs are used in models that are going to deprecate.
# Once those models are removed, we should delete this file to avoid confusion.
# Users should not use this file anymore.
@dataclasses.dataclass
class TensorboardConfig(base_config.Config):
"""Configuration for Tensorboard.
Attributes:
track_lr: Whether or not to track the learning rate in Tensorboard. Defaults
to True.
write_model_weights: Whether or not to write the model weights as images in
Tensorboard. Defaults to False.
"""
track_lr: bool = True
write_model_weights: bool = False
@dataclasses.dataclass
class CallbacksConfig(base_config.Config):
"""Configuration for Callbacks.
Attributes:
enable_checkpoint_and_export: Whether or not to enable checkpoints as a
Callback. Defaults to True.
enable_backup_and_restore: Whether or not to add BackupAndRestore
callback. Defaults to True.
enable_tensorboard: Whether or not to enable Tensorboard as a Callback.
Defaults to True.
enable_time_history: Whether or not to enable TimeHistory Callbacks.
Defaults to True.
"""
enable_checkpoint_and_export: bool = True
enable_backup_and_restore: bool = False
enable_tensorboard: bool = True
enable_time_history: bool = True
class ResNetModelConfig(base_configs.ModelConfig):
"""Configuration for the ResNet model."""
name: str = 'ResNet'
num_classes: int = 1000
model_params: base_config.Config = dataclasses.field(
# pylint: disable=g-long-lambda
default_factory=lambda: {
'num_classes': 1000,
'batch_size': None,
'use_l2_regularizer': True,
'rescale_inputs': False,
})
# pylint: enable=g-long-lambda
loss: base_configs.LossConfig = base_configs.LossConfig(
name='sparse_categorical_crossentropy')
optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
name='momentum',
decay=0.9,
epsilon=0.001,
momentum=0.9,
moving_average_decay=None)
learning_rate: base_configs.LearningRateConfig = (
base_configs.LearningRateConfig(
name='stepwise',
initial_lr=0.1,
examples_per_epoch=1281167,
boundaries=[30, 60, 80],
warmup_epochs=5,
scale_by_batch_size=1. / 256.,
multipliers=[0.1 / 256, 0.01 / 256, 0.001 / 256, 0.0001 / 256]))
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs a ResNet model on the ImageNet dataset using custom training loops."""
import math
import os
# Import libraries
from absl import app
from absl import flags
from absl import logging
import orbit
import tensorflow as tf
from official.common import distribute_utils
from official.legacy.image_classification.resnet import common
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.legacy.image_classification.resnet import resnet_runnable
from official.modeling import performance
from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils
from official.utils.misc import model_helpers
flags.DEFINE_boolean(name='use_tf_function', default=True,
help='Wrap the train and test step inside a '
'tf.function.')
flags.DEFINE_boolean(name='single_l2_loss_op', default=False,
help='Calculate L2_loss on concatenated weights, '
'instead of using Keras per-layer L2 loss.')
def build_stats(runnable, time_callback):
"""Normalizes and returns dictionary of stats.
Args:
runnable: The module containing all the training and evaluation metrics.
time_callback: Time tracking callback instance.
Returns:
Dictionary of normalized results.
"""
stats = {}
if not runnable.flags_obj.skip_eval:
stats['eval_loss'] = runnable.test_loss.result().numpy()
stats['eval_acc'] = runnable.test_accuracy.result().numpy()
stats['train_loss'] = runnable.train_loss.result().numpy()
stats['train_acc'] = runnable.train_accuracy.result().numpy()
if time_callback:
timestamp_log = time_callback.timestamp_log
stats['step_timestamp_log'] = timestamp_log
stats['train_finish_time'] = time_callback.train_finish_time
if time_callback.epoch_runtime_log:
stats['avg_exp_per_second'] = time_callback.average_examples_per_second
return stats
def get_num_train_iterations(flags_obj):
"""Returns the number of training steps, train and test epochs."""
train_steps = (
imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
train_epochs = flags_obj.train_epochs
if flags_obj.train_steps:
train_steps = min(flags_obj.train_steps, train_steps)
train_epochs = 1
eval_steps = math.ceil(1.0 * imagenet_preprocessing.NUM_IMAGES['validation'] /
flags_obj.batch_size)
return train_steps, train_epochs, eval_steps
def run(flags_obj):
"""Run ResNet ImageNet training and eval loop using custom training loops.
Args:
flags_obj: An object containing parsed flag values.
Raises:
ValueError: If fp16 is passed as it is not currently supported.
Returns:
Dictionary of training and eval stats.
"""
keras_utils.set_session_config()
performance.set_mixed_precision_policy(flags_core.get_tf_dtype(flags_obj))
if tf.config.list_physical_devices('GPU'):
if flags_obj.tf_gpu_thread_mode:
keras_utils.set_gpu_thread_mode_and_count(
per_gpu_thread_count=flags_obj.per_gpu_thread_count,
gpu_thread_mode=flags_obj.tf_gpu_thread_mode,
num_gpus=flags_obj.num_gpus,
datasets_num_private_threads=flags_obj.datasets_num_private_threads)
common.set_cudnn_batchnorm_mode()
data_format = flags_obj.data_format
if data_format is None:
data_format = ('channels_first' if tf.config.list_physical_devices('GPU')
else 'channels_last')
tf.keras.backend.set_image_data_format(data_format)
strategy = distribute_utils.get_distribution_strategy(
distribution_strategy=flags_obj.distribution_strategy,
num_gpus=flags_obj.num_gpus,
all_reduce_alg=flags_obj.all_reduce_alg,
num_packs=flags_obj.num_packs,
tpu_address=flags_obj.tpu)
per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations(
flags_obj)
if flags_obj.steps_per_loop is None:
steps_per_loop = per_epoch_steps
elif flags_obj.steps_per_loop > per_epoch_steps:
steps_per_loop = per_epoch_steps
logging.warn('Setting steps_per_loop to %d to respect epoch boundary.',
steps_per_loop)
else:
steps_per_loop = flags_obj.steps_per_loop
logging.info(
'Training %d epochs, each epoch has %d steps, '
'total steps: %d; Eval %d steps', train_epochs, per_epoch_steps,
train_epochs * per_epoch_steps, eval_steps)
time_callback = keras_utils.TimeHistory(
flags_obj.batch_size,
flags_obj.log_steps,
logdir=flags_obj.model_dir if flags_obj.enable_tensorboard else None)
with distribute_utils.get_strategy_scope(strategy):
runnable = resnet_runnable.ResnetRunnable(flags_obj, time_callback,
per_epoch_steps)
eval_interval = flags_obj.epochs_between_evals * per_epoch_steps
checkpoint_interval = (
steps_per_loop * 5 if flags_obj.enable_checkpoint_and_export else None)
summary_interval = steps_per_loop if flags_obj.enable_tensorboard else None
checkpoint_manager = tf.train.CheckpointManager(
runnable.checkpoint,
directory=flags_obj.model_dir,
max_to_keep=10,
step_counter=runnable.global_step,
checkpoint_interval=checkpoint_interval)
resnet_controller = orbit.Controller(
strategy=strategy,
trainer=runnable,
evaluator=runnable if not flags_obj.skip_eval else None,
global_step=runnable.global_step,
steps_per_loop=steps_per_loop,
checkpoint_manager=checkpoint_manager,
summary_interval=summary_interval,
summary_dir=flags_obj.model_dir,
eval_summary_dir=os.path.join(flags_obj.model_dir, 'eval'))
time_callback.on_train_begin()
if not flags_obj.skip_eval:
resnet_controller.train_and_evaluate(
train_steps=per_epoch_steps * train_epochs,
eval_steps=eval_steps,
eval_interval=eval_interval)
else:
resnet_controller.train(steps=per_epoch_steps * train_epochs)
time_callback.on_train_end()
stats = build_stats(runnable, time_callback)
return stats
def main(_):
model_helpers.apply_clean(flags.FLAGS)
stats = run(flags.FLAGS)
logging.info('Run stats:\n%s', stats)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
common.define_keras_flags()
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ResNet50 model for Keras.
Adapted from tf.keras.applications.resnet50.ResNet50().
This is ResNet model version 1.5.
Related papers/blogs:
- https://arxiv.org/abs/1512.03385
- https://arxiv.org/pdf/1603.05027v2.pdf
- http://torch.ch/blog/2016/02/04/resnets.html
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from official.legacy.image_classification.resnet import imagenet_preprocessing
layers = tf.keras.layers
def _gen_l2_regularizer(use_l2_regularizer=True, l2_weight_decay=1e-4):
return tf.keras.regularizers.L2(
l2_weight_decay) if use_l2_regularizer else None
def identity_block(input_tensor,
kernel_size,
filters,
stage,
block,
use_l2_regularizer=True,
batch_norm_decay=0.9,
batch_norm_epsilon=1e-5):
"""The identity block is the block that has no conv layer at shortcut.
Args:
input_tensor: input tensor
kernel_size: default 3, the kernel size of middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names
use_l2_regularizer: whether to use L2 regularizer on Conv layer.
batch_norm_decay: Moment of batch norm layers.
batch_norm_epsilon: Epsilon of batch borm layers.
Returns:
Output tensor for the block.
"""
filters1, filters2, filters3 = filters
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(
filters1, (1, 1),
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2a')(
input_tensor)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2a')(
x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(
filters2,
kernel_size,
padding='same',
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2b')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2b')(
x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(
filters3, (1, 1),
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2c')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2c')(
x)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
def conv_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2),
use_l2_regularizer=True,
batch_norm_decay=0.9,
batch_norm_epsilon=1e-5):
"""A block that has a conv layer at shortcut.
Note that from stage 3,
the second conv layer at main path is with strides=(2, 2)
And the shortcut should have strides=(2, 2) as well
Args:
input_tensor: input tensor
kernel_size: default 3, the kernel size of middle conv layer at main path
filters: list of integers, the filters of 3 conv layer at main path
stage: integer, current stage label, used for generating layer names
block: 'a','b'..., current block label, used for generating layer names
strides: Strides for the second conv layer in the block.
use_l2_regularizer: whether to use L2 regularizer on Conv layer.
batch_norm_decay: Moment of batch norm layers.
batch_norm_epsilon: Epsilon of batch borm layers.
Returns:
Output tensor for the block.
"""
filters1, filters2, filters3 = filters
if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(
filters1, (1, 1),
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2a')(
input_tensor)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2a')(
x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(
filters2,
kernel_size,
strides=strides,
padding='same',
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2b')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2b')(
x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(
filters3, (1, 1),
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '2c')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '2c')(
x)
shortcut = layers.Conv2D(
filters3, (1, 1),
strides=strides,
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name=conv_name_base + '1')(
input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name=bn_name_base + '1')(
shortcut)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
def resnet50(num_classes,
batch_size=None,
use_l2_regularizer=True,
rescale_inputs=False,
batch_norm_decay=0.9,
batch_norm_epsilon=1e-5):
"""Instantiates the ResNet50 architecture.
Args:
num_classes: `int` number of classes for image classification.
batch_size: Size of the batches for each step.
use_l2_regularizer: whether to use L2 regularizer on Conv/Dense layer.
rescale_inputs: whether to rescale inputs from 0 to 1.
batch_norm_decay: Moment of batch norm layers.
batch_norm_epsilon: Epsilon of batch borm layers.
Returns:
A Keras model instance.
"""
input_shape = (224, 224, 3)
img_input = layers.Input(shape=input_shape, batch_size=batch_size)
if rescale_inputs:
# Hub image modules expect inputs in the range [0, 1]. This rescales these
# inputs to the range expected by the trained model.
x = layers.Lambda(
lambda x: x * 255.0 - tf.keras.backend.constant( # pylint: disable=g-long-lambda
imagenet_preprocessing.CHANNEL_MEANS,
shape=[1, 1, 3],
dtype=x.dtype),
name='rescale')(
img_input)
else:
x = img_input
if tf.keras.backend.image_data_format() == 'channels_first':
x = layers.Permute((3, 1, 2))(x)
bn_axis = 1
else: # channels_last
bn_axis = 3
block_config = dict(
use_l2_regularizer=use_l2_regularizer,
batch_norm_decay=batch_norm_decay,
batch_norm_epsilon=batch_norm_epsilon)
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
x = layers.Conv2D(
64, (7, 7),
strides=(2, 2),
padding='valid',
use_bias=False,
kernel_initializer='he_normal',
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name='conv1')(
x)
x = layers.BatchNormalization(
axis=bn_axis,
momentum=batch_norm_decay,
epsilon=batch_norm_epsilon,
name='bn_conv1')(
x)
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
x = conv_block(
x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), **block_config)
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', **block_config)
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', **block_config)
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', **block_config)
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', **block_config)
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', **block_config)
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', **block_config)
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', **block_config)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', **block_config)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', **block_config)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', **block_config)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', **block_config)
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', **block_config)
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', **block_config)
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', **block_config)
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', **block_config)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(
num_classes,
kernel_initializer=tf.initializers.random_normal(stddev=0.01),
kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
bias_regularizer=_gen_l2_regularizer(use_l2_regularizer),
name='fc1000')(
x)
# A softmax that is followed by the model loss must be done cannot be done
# in float16 due to numeric issues. So we pass dtype=float32.
x = layers.Activation('softmax', dtype='float32')(x)
# Create model.
return tf.keras.Model(img_input, x, name='resnet50')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs a ResNet model on the ImageNet dataset using custom training loops."""
import orbit
import tensorflow as tf
from official.legacy.image_classification.resnet import common
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.legacy.image_classification.resnet import resnet_model
from official.modeling import grad_utils
from official.modeling import performance
from official.utils.flags import core as flags_core
class ResnetRunnable(orbit.StandardTrainer, orbit.StandardEvaluator):
"""Implements the training and evaluation APIs for Resnet model."""
def __init__(self, flags_obj, time_callback, epoch_steps):
self.strategy = tf.distribute.get_strategy()
self.flags_obj = flags_obj
self.dtype = flags_core.get_tf_dtype(flags_obj)
self.time_callback = time_callback
# Input pipeline related
batch_size = flags_obj.batch_size
if batch_size % self.strategy.num_replicas_in_sync != 0:
raise ValueError(
'Batch size must be divisible by number of replicas : {}'.format(
self.strategy.num_replicas_in_sync))
# As auto rebatching is not supported in
# `distribute_datasets_from_function()` API, which is
# required when cloning dataset to multiple workers in eager mode,
# we use per-replica batch size.
self.batch_size = int(batch_size / self.strategy.num_replicas_in_sync)
if self.flags_obj.use_synthetic_data:
self.input_fn = common.get_synth_input_fn(
height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
num_channels=imagenet_preprocessing.NUM_CHANNELS,
num_classes=imagenet_preprocessing.NUM_CLASSES,
dtype=self.dtype,
drop_remainder=True)
else:
self.input_fn = imagenet_preprocessing.input_fn
self.model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES,
use_l2_regularizer=not flags_obj.single_l2_loss_op)
lr_schedule = common.PiecewiseConstantDecayWithWarmup(
batch_size=flags_obj.batch_size,
epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
warmup_epochs=common.LR_SCHEDULE[0][1],
boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
multipliers=list(p[0] for p in common.LR_SCHEDULE),
compute_lr_on_cpu=True)
self.optimizer = common.get_optimizer(lr_schedule)
# Make sure iterations variable is created inside scope.
self.global_step = self.optimizer.iterations
self.optimizer = performance.configure_optimizer(
self.optimizer,
use_float16=self.dtype == tf.float16,
loss_scale=flags_core.get_loss_scale(flags_obj, default_for_fp16=128))
self.train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
'train_accuracy', dtype=tf.float32)
self.test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
'test_accuracy', dtype=tf.float32)
self.checkpoint = tf.train.Checkpoint(
model=self.model, optimizer=self.optimizer)
# Handling epochs.
self.epoch_steps = epoch_steps
self.epoch_helper = orbit.utils.EpochHelper(epoch_steps, self.global_step)
train_dataset = orbit.utils.make_distributed_dataset(
self.strategy,
self.input_fn,
is_training=True,
data_dir=self.flags_obj.data_dir,
batch_size=self.batch_size,
parse_record_fn=imagenet_preprocessing.parse_record,
datasets_num_private_threads=self.flags_obj
.datasets_num_private_threads,
dtype=self.dtype,
drop_remainder=True)
orbit.StandardTrainer.__init__(
self,
train_dataset,
options=orbit.StandardTrainerOptions(
use_tf_while_loop=flags_obj.use_tf_while_loop,
use_tf_function=flags_obj.use_tf_function))
if not flags_obj.skip_eval:
eval_dataset = orbit.utils.make_distributed_dataset(
self.strategy,
self.input_fn,
is_training=False,
data_dir=self.flags_obj.data_dir,
batch_size=self.batch_size,
parse_record_fn=imagenet_preprocessing.parse_record,
dtype=self.dtype)
orbit.StandardEvaluator.__init__(
self,
eval_dataset,
options=orbit.StandardEvaluatorOptions(
use_tf_function=flags_obj.use_tf_function))
def train_loop_begin(self):
"""See base class."""
# Reset all metrics
self.train_loss.reset_states()
self.train_accuracy.reset_states()
self._epoch_begin()
self.time_callback.on_batch_begin(self.epoch_helper.batch_index)
def train_step(self, iterator):
"""See base class."""
def step_fn(inputs):
"""Function to run on the device."""
images, labels = inputs
with tf.GradientTape() as tape:
logits = self.model(images, training=True)
prediction_loss = tf.keras.losses.sparse_categorical_crossentropy(
labels, logits)
loss = tf.reduce_sum(prediction_loss) * (1.0 /
self.flags_obj.batch_size)
num_replicas = self.strategy.num_replicas_in_sync
l2_weight_decay = 1e-4
if self.flags_obj.single_l2_loss_op:
l2_loss = l2_weight_decay * 2 * tf.add_n([
tf.nn.l2_loss(v)
for v in self.model.trainable_variables
if 'bn' not in v.name
])
loss += (l2_loss / num_replicas)
else:
loss += (tf.reduce_sum(self.model.losses) / num_replicas)
grad_utils.minimize_using_explicit_allreduce(
tape, self.optimizer, loss, self.model.trainable_variables)
self.train_loss.update_state(loss)
self.train_accuracy.update_state(labels, logits)
if self.flags_obj.enable_xla:
step_fn = tf.function(step_fn, jit_compile=True)
self.strategy.run(step_fn, args=(next(iterator),))
def train_loop_end(self):
"""See base class."""
metrics = {
'train_loss': self.train_loss.result(),
'train_accuracy': self.train_accuracy.result(),
}
self.time_callback.on_batch_end(self.epoch_helper.batch_index - 1)
self._epoch_end()
return metrics
def eval_begin(self):
"""See base class."""
self.test_loss.reset_states()
self.test_accuracy.reset_states()
def eval_step(self, iterator):
"""See base class."""
def step_fn(inputs):
"""Function to run on the device."""
images, labels = inputs
logits = self.model(images, training=False)
loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)
loss = tf.reduce_sum(loss) * (1.0 / self.flags_obj.batch_size)
self.test_loss.update_state(loss)
self.test_accuracy.update_state(labels, logits)
self.strategy.run(step_fn, args=(next(iterator),))
def eval_end(self):
"""See base class."""
return {
'test_loss': self.test_loss.result(),
'test_accuracy': self.test_accuracy.result()
}
def _epoch_begin(self):
if self.epoch_helper.epoch_begin():
self.time_callback.on_epoch_begin(self.epoch_helper.current_epoch)
def _epoch_end(self):
if self.epoch_helper.epoch_end():
self.time_callback.on_epoch_end(self.epoch_helper.current_epoch)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A script to export TF-Hub SavedModel."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import os
# Import libraries
from absl import app
from absl import flags
import tensorflow as tf
from official.legacy.image_classification.resnet import imagenet_preprocessing
from official.legacy.image_classification.resnet import resnet_model
FLAGS = flags.FLAGS
flags.DEFINE_string("model_path", None,
"File path to TF model checkpoint or H5 file.")
flags.DEFINE_string("export_path", None,
"TF-Hub SavedModel destination path to export.")
def export_tfhub(model_path, hub_destination):
"""Restores a tf.keras.Model and saves for TF-Hub."""
model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES, rescale_inputs=True)
model.load_weights(model_path)
model.save(
os.path.join(hub_destination, "classification"), include_optimizer=False)
# Extracts a sub-model to use pooling feature vector as model output.
image_input = model.get_layer(index=0).get_output_at(0)
feature_vector_output = model.get_layer(name="reduce_mean").get_output_at(0)
hub_model = tf.keras.Model(image_input, feature_vector_output)
# Exports a SavedModel.
hub_model.save(
os.path.join(hub_destination, "feature-vector"), include_optimizer=False)
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
export_tfhub(FLAGS.model_path, FLAGS.export_path)
if __name__ == "__main__":
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test utilities for image classification tasks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def trivial_model(num_classes):
"""Trivial model for ImageNet dataset."""
input_shape = (224, 224, 3)
img_input = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Lambda(
lambda x: tf.keras.backend.reshape(x, [-1, 224 * 224 * 3]),
name='reshape')(img_input)
x = tf.keras.layers.Dense(1, name='fc1')(x)
x = tf.keras.layers.Dense(num_classes, name='fc1000')(x)
x = tf.keras.layers.Activation('softmax', dtype='float32')(x)
return tf.keras.models.Model(img_input, x, name='trivial')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for grad_utils."""
import tensorflow as tf
from official.modeling import grad_utils
from official.modeling import performance
class GradUtilsTest(tf.test.TestCase):
def test_minimize(self):
optimizer = tf.keras.optimizers.SGD(0.1)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float32))
loss = tf.reduce_mean(outputs)
grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
model.trainable_variables)
def test_minimize_fp16(self):
optimizer = performance.configure_optimizer(
tf.keras.optimizers.SGD(0.1), use_float16=True)
performance.set_mixed_precision_policy(tf.float16)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float16))
loss = tf.reduce_mean(outputs)
grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
model.trainable_variables)
# Test other fp16 settings.
def _clip_by_global_norm(grads_and_vars):
grads, tvars = list(zip(*grads_and_vars))
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
return zip(grads, tvars)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float16))
loss = tf.reduce_mean(outputs)
optimizer = performance.configure_optimizer(
tf.keras.optimizers.SGD(0.1), use_float16=True, loss_scale=128)
grad_utils.minimize_using_explicit_allreduce(
tape,
optimizer,
loss,
model.trainable_variables,
pre_allreduce_callbacks=[_clip_by_global_norm],
post_allreduce_callbacks=[_clip_by_global_norm])
def test_set_mixed_precision_policy(self):
performance.set_mixed_precision_policy(tf.float16)
performance.set_mixed_precision_policy(tf.bfloat16)
performance.set_mixed_precision_policy(tf.float32)
with self.assertRaises(ValueError):
performance.set_mixed_precision_policy(tf.int32)
if __name__ == '__main__':
tf.test.main()
......@@ -14,14 +14,19 @@
"""Functions and classes related to training performance."""
from absl import logging
import tensorflow as tf
def configure_optimizer(optimizer,
use_float16=False,
use_graph_rewrite=False,
loss_scale=None):
loss_scale=None,
use_graph_rewrite=None):
"""Configures optimizer object with performance options."""
if use_graph_rewrite is not None:
logging.warning('`use_graph_rewrite` is deprecated inside '
'`configure_optimizer`. Please remove the usage.')
del use_graph_rewrite
if use_float16:
if loss_scale in (None, 'dynamic'):
optimizer = tf.keras.mixed_precision.LossScaleOptimizer(optimizer)
......@@ -29,13 +34,6 @@ def configure_optimizer(optimizer,
# loss_scale is a number. We interpret that as a fixed loss scale.
optimizer = tf.keras.mixed_precision.LossScaleOptimizer(
optimizer, dynamic=False, initial_scale=loss_scale)
if use_graph_rewrite:
# Note: the model dtype must be 'float32', which will ensure
# tf.keras.mixed_precision and enable_mixed_precision_graph_rewrite do not
# double up.
optimizer = (
tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
optimizer))
return optimizer
......
......@@ -110,6 +110,8 @@ def get_activation(identifier, use_keras_layer=False):
"swish": "swish",
"sigmoid": "sigmoid",
"relu6": tf.nn.relu6,
"hard_swish": activations.hard_swish,
"hard_sigmoid": activations.hard_sigmoid,
}
if identifier in keras_layer_allowlist:
return tf.keras.layers.Activation(keras_layer_allowlist[identifier])
......
......@@ -44,9 +44,6 @@ READMEs for specific papers.
4. [Transformer for translation](transformer):
[Attention Is All You Need](https://arxiv.org/abs/1706.03762) by Vaswani et
al., 2017
5. [NHNet](nhnet):
[Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386)
by Gu et al, 2020
### Common Training Driver
......
......@@ -121,9 +121,5 @@ def use_float16():
return flags_core.get_tf_dtype(flags.FLAGS) == tf.float16
def use_graph_rewrite():
return flags.FLAGS.fp16_implementation == 'graph_rewrite'
def get_loss_scale():
return flags_core.get_loss_scale(flags.FLAGS, default_for_fp16='dynamic')
......@@ -150,8 +150,7 @@ def run_bert_classifier(strategy,
FLAGS.optimizer_type)
classifier_model.optimizer = performance.configure_optimizer(
optimizer,
use_float16=common_flags.use_float16(),
use_graph_rewrite=common_flags.use_graph_rewrite())
use_float16=common_flags.use_float16())
return classifier_model, core_model
# tf.keras.losses objects accept optional sample_weight arguments (eg. coming
......
......@@ -125,8 +125,7 @@ def run_customized_training(strategy,
end_lr, optimizer_type)
pretrain_model.optimizer = performance.configure_optimizer(
optimizer,
use_float16=common_flags.use_float16(),
use_graph_rewrite=common_flags.use_graph_rewrite())
use_float16=common_flags.use_float16())
return pretrain_model, core_model
trained_model = model_training_utils.run_customized_training_loop(
......
......@@ -252,8 +252,7 @@ def train_squad(strategy,
squad_model.optimizer = performance.configure_optimizer(
optimizer,
use_float16=common_flags.use_float16(),
use_graph_rewrite=common_flags.use_graph_rewrite())
use_float16=common_flags.use_float16())
return squad_model, core_model
# Only when explicit_allreduce = True, post_allreduce_callbacks and
......
......@@ -170,6 +170,32 @@ class KernelEncoderConfig(hyperparams.Config):
scale: Optional[float] = None
@dataclasses.dataclass
class ReuseEncoderConfig(hyperparams.Config):
"""Reuse encoder configuration."""
vocab_size: int = 30522
hidden_size: int = 768
num_layers: int = 12
num_attention_heads: int = 12
hidden_activation: str = "gelu"
intermediate_size: int = 3072
dropout_rate: float = 0.1
attention_dropout_rate: float = 0.1
max_position_embeddings: int = 512
type_vocab_size: int = 2
initializer_range: float = 0.02
embedding_size: Optional[int] = None
output_range: Optional[int] = None
return_all_encoder_outputs: bool = False
# Pre/Post-LN Transformer
norm_first: bool = False
# Reuse transformer
reuse_attention: int = -1
use_relative_pe: bool = False
pe_max_seq_length: int = 512
max_reuse_layer_idx: int = 6
@dataclasses.dataclass
class XLNetEncoderConfig(hyperparams.Config):
"""XLNet encoder configuration."""
......@@ -205,6 +231,7 @@ class EncoderConfig(hyperparams.OneOfConfig):
bigbird: BigBirdEncoderConfig = BigBirdEncoderConfig()
kernel: KernelEncoderConfig = KernelEncoderConfig()
mobilebert: MobileBertEncoderConfig = MobileBertEncoderConfig()
reuse: ReuseEncoderConfig = ReuseEncoderConfig()
teams: BertEncoderConfig = BertEncoderConfig()
xlnet: XLNetEncoderConfig = XLNetEncoderConfig()
......@@ -472,6 +499,43 @@ def build_encoder(config: EncoderConfig,
dict_outputs=True)
return networks.EncoderScaffold(**kwargs)
if encoder_type == "reuse":
embedding_cfg = dict(
vocab_size=encoder_cfg.vocab_size,
type_vocab_size=encoder_cfg.type_vocab_size,
hidden_size=encoder_cfg.hidden_size,
max_seq_length=encoder_cfg.max_position_embeddings,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=encoder_cfg.initializer_range),
dropout_rate=encoder_cfg.dropout_rate)
hidden_cfg = dict(
num_attention_heads=encoder_cfg.num_attention_heads,
inner_dim=encoder_cfg.intermediate_size,
inner_activation=tf_utils.get_activation(
encoder_cfg.hidden_activation),
output_dropout=encoder_cfg.dropout_rate,
attention_dropout=encoder_cfg.attention_dropout_rate,
norm_first=encoder_cfg.norm_first,
kernel_initializer=tf.keras.initializers.TruncatedNormal(
stddev=encoder_cfg.initializer_range),
reuse_attention=encoder_cfg.reuse_attention,
use_relative_pe=encoder_cfg.use_relative_pe,
pe_max_seq_length=encoder_cfg.pe_max_seq_length,
max_reuse_layer_idx=encoder_cfg.max_reuse_layer_idx)
kwargs = dict(
embedding_cfg=embedding_cfg,
hidden_cls=layers.ReuseTransformer,
hidden_cfg=hidden_cfg,
num_hidden_instances=encoder_cfg.num_layers,
pooled_output_dim=encoder_cfg.hidden_size,
pooler_layer_initializer=tf.keras.initializers.TruncatedNormal(
stddev=encoder_cfg.initializer_range),
return_all_layer_outputs=False,
dict_outputs=True,
feed_layer_idx=True,
recursive=True)
return networks.EncoderScaffold(**kwargs)
bert_encoder_cls = networks.BertEncoder
if encoder_type == "bert_v2":
bert_encoder_cls = networks.BertEncoderV2
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Loads dataset for the dual encoder (retrieval) task."""
import functools
import itertools
from typing import Iterable, Mapping, Optional, Tuple
import dataclasses
import tensorflow as tf
import tensorflow_hub as hub
from official.core import config_definitions as cfg
from official.core import input_reader
from official.nlp.data import data_loader
from official.nlp.data import data_loader_factory
from official.nlp.modeling import layers
@dataclasses.dataclass
class DualEncoderDataConfig(cfg.DataConfig):
"""Data config for dual encoder task (tasks/dual_encoder)."""
# Either set `input_path`...
input_path: str = ''
# ...or `tfds_name` and `tfds_split` to specify input.
tfds_name: str = ''
tfds_split: str = ''
global_batch_size: int = 32
# Either build preprocessing with Python code by specifying these values...
vocab_file: str = ''
lower_case: bool = True
# ...or load preprocessing from a SavedModel at this location.
preprocessing_hub_module_url: str = ''
left_text_fields: Tuple[str] = ('left_input',)
right_text_fields: Tuple[str] = ('right_input',)
is_training: bool = True
seq_length: int = 128
@data_loader_factory.register_data_loader_cls(DualEncoderDataConfig)
class DualEncoderDataLoader(data_loader.DataLoader):
"""A class to load dataset for dual encoder task (tasks/dual_encoder)."""
def __init__(self, params):
if bool(params.tfds_name) == bool(params.input_path):
raise ValueError('Must specify either `tfds_name` and `tfds_split` '
'or `input_path`.')
if bool(params.vocab_file) == bool(params.preprocessing_hub_module_url):
raise ValueError('Must specify exactly one of vocab_file (with matching '
'lower_case flag) or preprocessing_hub_module_url.')
self._params = params
self._seq_length = params.seq_length
self._left_text_fields = params.left_text_fields
self._right_text_fields = params.right_text_fields
if params.preprocessing_hub_module_url:
preprocessing_hub_module = hub.load(params.preprocessing_hub_module_url)
self._tokenizer = preprocessing_hub_module.tokenize
self._pack_inputs = functools.partial(
preprocessing_hub_module.bert_pack_inputs,
seq_length=params.seq_length)
else:
self._tokenizer = layers.BertTokenizer(
vocab_file=params.vocab_file, lower_case=params.lower_case)
self._pack_inputs = layers.BertPackInputs(
seq_length=params.seq_length,
special_tokens_dict=self._tokenizer.get_special_tokens_dict())
def _decode(self, record: tf.Tensor):
"""Decodes a serialized tf.Example."""
name_to_features = {
x: tf.io.FixedLenFeature([], tf.string)
for x in itertools.chain(
*[self._left_text_fields, self._right_text_fields])
}
example = tf.io.parse_single_example(record, name_to_features)
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
for name in example:
t = example[name]
if t.dtype == tf.int64:
t = tf.cast(t, tf.int32)
example[name] = t
return example
def _bert_tokenize(
self, record: Mapping[str, tf.Tensor],
text_fields: Iterable[str]) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
"""Tokenize the input in text_fields using BERT tokenizer.
Args:
record: A tfexample record contains the features.
text_fields: A list of fields to be tokenzied.
Returns:
The tokenized features in a tuple of (input_word_ids, input_mask,
input_type_ids).
"""
segments_text = [record[x] for x in text_fields]
segments_tokens = [self._tokenizer(s) for s in segments_text]
segments = [tf.cast(x.merge_dims(1, 2), tf.int32) for x in segments_tokens]
return self._pack_inputs(segments)
def _bert_preprocess(
self, record: Mapping[str, tf.Tensor]) -> Mapping[str, tf.Tensor]:
"""Perform the bert word piece tokenization for left and right inputs."""
def _switch_prefix(string, old, new):
if string.startswith(old): return new + string[len(old):]
raise ValueError('Expected {} to start with {}'.format(string, old))
def _switch_key_prefix(d, old, new):
return {_switch_prefix(key, old, new): value for key, value in d.items()}
model_inputs = _switch_key_prefix(
self._bert_tokenize(record, self._left_text_fields),
'input_', 'left_')
model_inputs.update(_switch_key_prefix(
self._bert_tokenize(record, self._right_text_fields),
'input_', 'right_'))
return model_inputs
def load(self, input_context: Optional[tf.distribute.InputContext] = None):
"""Returns a tf.dataset.Dataset."""
reader = input_reader.InputReader(
params=self._params,
# Skip `decoder_fn` for tfds input.
decoder_fn=self._decode if self._params.input_path else None,
dataset_fn=tf.data.TFRecordDataset,
postprocess_fn=self._bert_preprocess)
return reader.read(input_context)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for official.nlp.data.dual_encoder_dataloader."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.nlp.data import dual_encoder_dataloader
_LEFT_FEATURE_NAME = 'left_input'
_RIGHT_FEATURE_NAME = 'right_input'
def _create_fake_dataset(output_path):
"""Creates a fake dataset contains examples for training a dual encoder model.
The created dataset contains examples with two byteslist features keyed by
_LEFT_FEATURE_NAME and _RIGHT_FEATURE_NAME.
Args:
output_path: The output path of the fake dataset.
"""
def create_str_feature(values):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=values))
with tf.io.TFRecordWriter(output_path) as writer:
for _ in range(100):
features = {}
features[_LEFT_FEATURE_NAME] = create_str_feature([b'hello world.'])
features[_RIGHT_FEATURE_NAME] = create_str_feature([b'world hello.'])
tf_example = tf.train.Example(
features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
def _make_vocab_file(vocab, output_path):
with tf.io.gfile.GFile(output_path, 'w') as f:
f.write('\n'.join(vocab + ['']))
class DualEncoderDataTest(tf.test.TestCase, parameterized.TestCase):
def test_load_dataset(self):
seq_length = 16
batch_size = 10
train_data_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
vocab_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
_create_fake_dataset(train_data_path)
_make_vocab_file(
['[PAD]', '[UNK]', '[CLS]', '[SEP]', 'he', '#llo', 'world'], vocab_path)
data_config = dual_encoder_dataloader.DualEncoderDataConfig(
input_path=train_data_path,
seq_length=seq_length,
vocab_file=vocab_path,
lower_case=True,
left_text_fields=(_LEFT_FEATURE_NAME,),
right_text_fields=(_RIGHT_FEATURE_NAME,),
global_batch_size=batch_size)
dataset = dual_encoder_dataloader.DualEncoderDataLoader(
data_config).load()
features = next(iter(dataset))
self.assertCountEqual(
['left_word_ids', 'left_mask', 'left_type_ids', 'right_word_ids',
'right_mask', 'right_type_ids'],
features.keys())
self.assertEqual(features['left_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['left_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['left_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['right_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['right_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['right_type_ids'].shape, (batch_size, seq_length))
@parameterized.parameters(False, True)
def test_load_tfds(self, use_preprocessing_hub):
seq_length = 16
batch_size = 10
if use_preprocessing_hub:
vocab_path = ''
preprocessing_hub = (
'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3')
else:
vocab_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
_make_vocab_file(
['[PAD]', '[UNK]', '[CLS]', '[SEP]', 'he', '#llo', 'world'],
vocab_path)
preprocessing_hub = ''
data_config = dual_encoder_dataloader.DualEncoderDataConfig(
tfds_name='para_crawl/enmt',
tfds_split='train',
seq_length=seq_length,
vocab_file=vocab_path,
lower_case=True,
left_text_fields=('en',),
right_text_fields=('mt',),
preprocessing_hub_module_url=preprocessing_hub,
global_batch_size=batch_size)
dataset = dual_encoder_dataloader.DualEncoderDataLoader(
data_config).load()
features = next(iter(dataset))
self.assertCountEqual(
['left_word_ids', 'left_mask', 'left_type_ids', 'right_word_ids',
'right_mask', 'right_type_ids'],
features.keys())
self.assertEqual(features['left_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['left_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['left_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['right_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['right_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['right_type_ids'].shape, (batch_size, seq_length))
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment