Merged commit includes the following changes: (#7365)

261339941 by haoyuzhang<haoyuzhang@google.com>: Own library functions in Keras ResNet models, and remove dependencies on v1 Estimator version of ResNet models. Most dependencies that the Keras version has are related to data input pipelines. Created dedicated files (cifar_preprocessing.py, imagenet_preprocessing.py) to collect all logic handling Cifar and ImageNet data input function. -- 261339166 by haoyuzhang<haoyuzhang@google.com>: Internal change 261317601 by akuegel<akuegel@google.com>: Internal change 261218818 by A. Unique TensorFlower<gardener@tensorflow.org>: Internal change PiperOrigin-RevId: 261339941

Merged commit includes the following changes: (#7365)
261339941 by haoyuzhang<haoyuzhang@google.com>: Own library functions in Keras ResNet models, and remove dependencies on v1 Estimator version of ResNet models. Most dependencies that the Keras version has are related to data input pipelines. Created dedicated files (cifar_preprocessing.py, imagenet_preprocessing.py) to collect all logic handling Cifar and ImageNet data input function. -- 261339166 by haoyuzhang<haoyuzhang@google.com>: Internal change 261317601 by akuegel<akuegel@google.com>: Internal change 261218818 by A. Unique TensorFlower<gardener@tensorflow.org>: Internal change PiperOrigin-RevId: 261339941
1921a3b5 · Haoyu Zhang · GitHub · 13e7c85d · 1921a3b5 · 1921a3b5
Unverified Commit 1921a3b5 authored Aug 02, 2019 by Haoyu Zhang Committed by GitHub Aug 02, 2019
8 changed files
--- a/official/bert/modeling.py
+++ b/official/bert/modeling.py
@@ -799,9 +799,9 @@ class Transformer(tf.keras.layers.Layer):
              name=("layer_%d" % i)))
    super(Transformer, self).build(unused_input_shapes)

-  def __call__(self, input_tensor, attention_mask=None):
+  def __call__(self, input_tensor, attention_mask=None, **kwargs):
    inputs = pack_inputs([input_tensor, attention_mask])
-    return super(Transformer, self).__call__(inputs=inputs)
+    return super(Transformer, self).__call__(inputs=inputs, **kwargs)

  def call(self, inputs):
    """Implements call() for the layer."""

--- a/official/resnet/keras/cifar_preprocessing.py
+++ b/official/resnet/keras/cifar_preprocessing.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides utilities to Cifar-10 dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from absl import logging
+import tensorflow as tf
+
+from official.resnet.keras import imagenet_preprocessing
+
+HEIGHT = 32
+WIDTH = 32
+NUM_CHANNELS = 3
+_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS
+# The record is the image plus a one-byte label
+_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
+
+# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
+NUM_IMAGES = {
+    'train': 50000,
+    'validation': 10000,
+}
+_NUM_DATA_FILES = 5
+NUM_CLASSES = 10
+
+
+def parse_record(raw_record, is_training, dtype):
+  """Parses a record containing a training example of an image.
+
+  The input record is parsed into a label and image, and the image is passed
+  through preprocessing steps (cropping, flipping, and so on).
+
+  This method converts the label to one hot to fit the loss function.
+
+  Args:
+    raw_record: scalar Tensor tf.string containing a serialized
+      Example protocol buffer.
+    is_training: A boolean denoting whether the input is for training.
+    dtype: Data type to use for input images.
+
+  Returns:
+    Tuple with processed image tensor and one-hot-encoded label tensor.
+  """
+  # Convert bytes to a vector of uint8 that is record_bytes long.
+  record_vector = tf.io.decode_raw(raw_record, tf.uint8)
+
+  # The first byte represents the label, which we convert from uint8 to int32
+  # and then to one-hot.
+  label = tf.cast(record_vector[0], tf.int32)
+
+  # The remaining bytes after the label represent the image, which we reshape
+  # from [depth * height * width] to [depth, height, width].
+  depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
+                           [NUM_CHANNELS, HEIGHT, WIDTH])
+
+  # Convert from [depth, height, width] to [height, width, depth], and cast as
+  # float32.
+  image = tf.cast(tf.transpose(a=depth_major, perm=[1, 2, 0]), tf.float32)
+
+  image = preprocess_image(image, is_training)
+  image = tf.cast(image, dtype)
+
+  # TODO(haoyuzhang,hongkuny,tobyboyd): Remove or replace the use of V1 API
+  label = tf.compat.v1.sparse_to_dense(label, (NUM_CLASSES,), 1)
+  return image, label
+
+
+def preprocess_image(image, is_training):
+  """Preprocess a single image of layout [height, width, depth]."""
+  if is_training:
+    # Resize the image to add four extra pixels on each side.
+    image = tf.image.resize_with_crop_or_pad(
+        image, HEIGHT + 8, WIDTH + 8)
+
+    # Randomly crop a [HEIGHT, WIDTH] section of the image.
+    image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
+
+    # Randomly flip the image horizontally.
+    image = tf.image.random_flip_left_right(image)
+
+  # Subtract off the mean and divide by the variance of the pixels.
+  image = tf.image.per_image_standardization(image)
+  return image
+
+
+def get_filenames(is_training, data_dir):
+  """Returns a list of filenames."""
+  assert tf.io.gfile.exists(data_dir), (
+      'Run cifar10_download_and_extract.py first to download and extract the '
+      'CIFAR-10 data.')
+
+  if is_training:
+    return [
+        os.path.join(data_dir, 'data_batch_%d.bin' % i)
+        for i in range(1, _NUM_DATA_FILES + 1)
+    ]
+  else:
+    return [os.path.join(data_dir, 'test_batch.bin')]
+
+
+def input_fn(is_training,
+             data_dir,
+             batch_size,
+             num_epochs=1,
+             dtype=tf.float32,
+             datasets_num_private_threads=None,
+             parse_record_fn=parse_record,
+             input_context=None,
+             drop_remainder=False):
+  """Input function which provides batches for train or eval.
+
+  Args:
+    is_training: A boolean denoting whether the input is for training.
+    data_dir: The directory containing the input data.
+    batch_size: The number of samples per batch.
+    num_epochs: The number of epochs to repeat the dataset.
+    dtype: Data type to use for images/features
+    datasets_num_private_threads: Number of private threads for tf.data.
+    parse_record_fn: Function to use for parsing the records.
+    input_context: A `tf.distribute.InputContext` object passed in by
+      `tf.distribute.Strategy`.
+    drop_remainder: A boolean indicates whether to drop the remainder of the
+      batches. If True, the batch dimension will be static.
+
+  Returns:
+    A dataset that can be used for iteration.
+  """
+  filenames = get_filenames(is_training, data_dir)
+  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)
+
+  if input_context:
+    logging.info(
+        'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
+        input_context.input_pipeline_id, input_context.num_input_pipelines)
+    dataset = dataset.shard(input_context.num_input_pipelines,
+                            input_context.input_pipeline_id)
+
+  return imagenet_preprocessing.process_record_dataset(
+      dataset=dataset,
+      is_training=is_training,
+      batch_size=batch_size,
+      shuffle_buffer=NUM_IMAGES['train'],
+      parse_record_fn=parse_record_fn,
+      num_epochs=num_epochs,
+      dtype=dtype,
+      datasets_num_private_threads=datasets_num_private_threads,
+      drop_remainder=drop_remainder
+  )
--- a/official/resnet/keras/imagenet_preprocessing.py
+++ b/official/resnet/keras/imagenet_preprocessing.py
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides utilities to preprocess images.
+
+Training images are sampled using the provided bounding boxes, and subsequently
+cropped to the sampled bounding box. Images are additionally flipped randomly,
+then resized to the target output size (without aspect-ratio preservation).
+
+Images used during evaluation are resized (with aspect-ratio preservation) and
+centrally cropped.
+
+All images undergo mean color subtraction.
+
+Note that these steps are colloquially referred to as "ResNet preprocessing,"
+and they differ from "VGG preprocessing," which does not use bounding boxes
+and instead does an aspect-preserving resize followed by random crop during
+training. (These both differ from "Inception preprocessing," which introduces
+color distortion steps.)
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from absl import logging
+import tensorflow as tf
+
+DEFAULT_IMAGE_SIZE = 224
+NUM_CHANNELS = 3
+NUM_CLASSES = 1001
+
+NUM_IMAGES = {
+    'train': 1281167,
+    'validation': 50000,
+}
+
+_NUM_TRAIN_FILES = 1024
+_SHUFFLE_BUFFER = 10000
+
+_R_MEAN = 123.68
+_G_MEAN = 116.78
+_B_MEAN = 103.94
+_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
+
+# The lower bound for the smallest side of the image for aspect-preserving
+# resizing. For example, if an image is 500 x 1000, it will be resized to
+# _RESIZE_MIN x (_RESIZE_MIN * 2).
+_RESIZE_MIN = 256
+
+
+def process_record_dataset(dataset,
+                           is_training,
+                           batch_size,
+                           shuffle_buffer,
+                           parse_record_fn,
+                           num_epochs=1,
+                           dtype=tf.float32,
+                           datasets_num_private_threads=None,
+                           drop_remainder=False,
+                           tf_data_experimental_slack=False):
+  """Given a Dataset with raw records, return an iterator over the records.
+
+  Args:
+    dataset: A Dataset representing raw records
+    is_training: A boolean denoting whether the input is for training.
+    batch_size: The number of samples per batch.
+    shuffle_buffer: The buffer size to use when shuffling records. A larger
+      value results in better randomness, but smaller values reduce startup
+      time and use less memory.
+    parse_record_fn: A function that takes a raw record and returns the
+      corresponding (image, label) pair.
+    num_epochs: The number of epochs to repeat the dataset.
+    dtype: Data type to use for images/features.
+    datasets_num_private_threads: Number of threads for a private
+      threadpool created for all datasets computation.
+    drop_remainder: A boolean indicates whether to drop the remainder of the
+      batches. If True, the batch dimension will be static.
+    tf_data_experimental_slack: Whether to enable tf.data's
+      `experimental_slack` option.
+
+  Returns:
+    Dataset of (image, label) pairs ready for iteration.
+  """
+  # Defines a specific size thread pool for tf.data operations.
+  if datasets_num_private_threads:
+    options = tf.data.Options()
+    options.experimental_threading.private_threadpool_size = (
+        datasets_num_private_threads)
+    dataset = dataset.with_options(options)
+    logging.info(
+        'datasets_num_private_threads: %s', datasets_num_private_threads)
+
+  # Disable intra-op parallelism to optimize for throughput instead of latency.
+  options = tf.data.Options()
+  options.experimental_threading.max_intra_op_parallelism = 1
+  dataset = dataset.with_options(options)
+
+  # Prefetches a batch at a time to smooth out the time taken to load input
+  # files for shuffling and processing.
+  dataset = dataset.prefetch(buffer_size=batch_size)
+  if is_training:
+    # Shuffles records before repeating to respect epoch boundaries.
+    dataset = dataset.shuffle(buffer_size=shuffle_buffer)
+
+  # Repeats the dataset for the number of epochs to train.
+  dataset = dataset.repeat(num_epochs)
+
+  # Parses the raw records into images and labels.
+  dataset = dataset.map(
+      lambda value: parse_record_fn(value, is_training, dtype),
+      num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
+
+  # Operations between the final prefetch and the get_next call to the iterator
+  # will happen synchronously during run time. We prefetch here again to
+  # background all of the above processing work and keep it out of the
+  # critical training path. Setting buffer_size to tf.contrib.data.AUTOTUNE
+  # allows DistributionStrategies to adjust how many batches to fetch based
+  # on how many devices are present.
+  dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+
+  if tf_data_experimental_slack:
+    options = tf.data.Options()
+    options.experimental_slack = True
+    dataset = dataset.with_options(options)
+
+  return dataset
+
+
+def get_filenames(is_training, data_dir):
+  """Return filenames for dataset."""
+  if is_training:
+    return [
+        os.path.join(data_dir, 'train-%05d-of-01024' % i)
+        for i in range(_NUM_TRAIN_FILES)]
+  else:
+    return [
+        os.path.join(data_dir, 'validation-%05d-of-00128' % i)
+        for i in range(128)]
+
+
+def _parse_example_proto(example_serialized):
+  """Parses an Example proto containing a training example of an image.
+
+  The output of the build_image_data.py image preprocessing script is a dataset
+  containing serialized Example protocol buffers. Each Example proto contains
+  the following fields (values are included as examples):
+
+    image/height: 462
+    image/width: 581
+    image/colorspace: 'RGB'
+    image/channels: 3
+    image/class/label: 615
+    image/class/synset: 'n03623198'
+    image/class/text: 'knee pad'
+    image/object/bbox/xmin: 0.1
+    image/object/bbox/xmax: 0.9
+    image/object/bbox/ymin: 0.2
+    image/object/bbox/ymax: 0.6
+    image/object/bbox/label: 615
+    image/format: 'JPEG'
+    image/filename: 'ILSVRC2012_val_00041207.JPEG'
+    image/encoded: <JPEG encoded string>
+
+  Args:
+    example_serialized: scalar Tensor tf.string containing a serialized
+      Example protocol buffer.
+
+  Returns:
+    image_buffer: Tensor tf.string containing the contents of a JPEG file.
+    label: Tensor tf.int32 containing the label.
+    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
+      where each coordinate is [0, 1) and the coordinates are arranged as
+      [ymin, xmin, ymax, xmax].
+  """
+  # Dense features in Example proto.
+  feature_map = {
+      'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string,
+                                             default_value=''),
+      'image/class/label': tf.io.FixedLenFeature([], dtype=tf.int64,
+                                                 default_value=-1),
+      'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string,
+                                                default_value=''),
+  }
+  sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
+  # Sparse features in Example proto.
+  feature_map.update(
+      {k: sparse_float32 for k in [
+          'image/object/bbox/xmin', 'image/object/bbox/ymin',
+          'image/object/bbox/xmax', 'image/object/bbox/ymax']})
+
+  features = tf.io.parse_single_example(serialized=example_serialized,
+                                        features=feature_map)
+  label = tf.cast(features['image/class/label'], dtype=tf.int32)
+
+  xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
+  ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
+  xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
+  ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
+
+  # Note that we impose an ordering of (y, x) just to make life difficult.
+  bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
+
+  # Force the variable number of bounding boxes into the shape
+  # [1, num_boxes, coords].
+  bbox = tf.expand_dims(bbox, 0)
+  bbox = tf.transpose(a=bbox, perm=[0, 2, 1])
+
+  return features['image/encoded'], label, bbox
+
+
+def parse_record(raw_record, is_training, dtype):
+  """Parses a record containing a training example of an image.
+
+  The input record is parsed into a label and image, and the image is passed
+  through preprocessing steps (cropping, flipping, and so on).
+
+  Args:
+    raw_record: scalar Tensor tf.string containing a serialized
+      Example protocol buffer.
+    is_training: A boolean denoting whether the input is for training.
+    dtype: data type to use for images/features.
+
+  Returns:
+    Tuple with processed image tensor and one-hot-encoded label tensor.
+  """
+  image_buffer, label, bbox = _parse_example_proto(raw_record)
+
+  image = preprocess_image(
+      image_buffer=image_buffer,
+      bbox=bbox,
+      output_height=DEFAULT_IMAGE_SIZE,
+      output_width=DEFAULT_IMAGE_SIZE,
+      num_channels=NUM_CHANNELS,
+      is_training=is_training)
+  image = tf.cast(image, dtype)
+
+  # Subtract one so that labels are in [0, 1000), and cast to float32 for
+  # Keras model.
+  label = tf.cast(tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1,
+                  dtype=tf.float32)
+  return image, label
+
+
+def input_fn(is_training,
+             data_dir,
+             batch_size,
+             num_epochs=1,
+             dtype=tf.float32,
+             datasets_num_private_threads=None,
+             parse_record_fn=parse_record,
+             input_context=None,
+             drop_remainder=False,
+             tf_data_experimental_slack=False):
+  """Input function which provides batches for train or eval.
+
+  Args:
+    is_training: A boolean denoting whether the input is for training.
+    data_dir: The directory containing the input data.
+    batch_size: The number of samples per batch.
+    num_epochs: The number of epochs to repeat the dataset.
+    dtype: Data type to use for images/features
+    datasets_num_private_threads: Number of private threads for tf.data.
+    parse_record_fn: Function to use for parsing the records.
+    input_context: A `tf.distribute.InputContext` object passed in by
+      `tf.distribute.Strategy`.
+    drop_remainder: A boolean indicates whether to drop the remainder of the
+      batches. If True, the batch dimension will be static.
+    tf_data_experimental_slack: Whether to enable tf.data's
+      `experimental_slack` option.
+
+  Returns:
+    A dataset that can be used for iteration.
+  """
+  filenames = get_filenames(is_training, data_dir)
+  dataset = tf.data.Dataset.from_tensor_slices(filenames)
+
+  if input_context:
+    logging.info(
+        'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
+        input_context.input_pipeline_id, input_context.num_input_pipelines)
+    dataset = dataset.shard(input_context.num_input_pipelines,
+                            input_context.input_pipeline_id)
+
+  if is_training:
+    # Shuffle the input files
+    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
+
+  # Convert to individual records.
+  # cycle_length = 10 means that up to 10 files will be read and deserialized in
+  # parallel. You may want to increase this number if you have a large number of
+  # CPU cores.
+  dataset = dataset.interleave(
+      tf.data.TFRecordDataset,
+      cycle_length=10,
+      num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+  return process_record_dataset(
+      dataset=dataset,
+      is_training=is_training,
+      batch_size=batch_size,
+      shuffle_buffer=_SHUFFLE_BUFFER,
+      parse_record_fn=parse_record_fn,
+      num_epochs=num_epochs,
+      dtype=dtype,
+      datasets_num_private_threads=datasets_num_private_threads,
+      drop_remainder=drop_remainder,
+      tf_data_experimental_slack=tf_data_experimental_slack,
+  )
+
+
+def _decode_crop_and_flip(image_buffer, bbox, num_channels):
+  """Crops the given image to a random part of the image, and randomly flips.
+
+  We use the fused decode_and_crop op, which performs better than the two ops
+  used separately in series, but note that this requires that the image be
+  passed in as an un-decoded string Tensor.
+
+  Args:
+    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
+    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
+      where each coordinate is [0, 1) and the coordinates are arranged as
+      [ymin, xmin, ymax, xmax].
+    num_channels: Integer depth of the image buffer for decoding.
+
+  Returns:
+    3-D tensor with cropped image.
+
+  """
+  # A large fraction of image datasets contain a human-annotated bounding box
+  # delineating the region of the image containing the object of interest.  We
+  # choose to create a new bounding box for the object which is a randomly
+  # distorted version of the human-annotated bounding box that obeys an
+  # allowed range of aspect ratios, sizes and overlap with the human-annotated
+  # bounding box. If no box is supplied, then we assume the bounding box is
+  # the entire image.
+  sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+      tf.image.extract_jpeg_shape(image_buffer),
+      bounding_boxes=bbox,
+      min_object_covered=0.1,
+      aspect_ratio_range=[0.75, 1.33],
+      area_range=[0.05, 1.0],
+      max_attempts=100,
+      use_image_if_no_bounding_boxes=True)
+  bbox_begin, bbox_size, _ = sample_distorted_bounding_box
+
+  # Reassemble the bounding box in the format the crop op requires.
+  offset_y, offset_x, _ = tf.unstack(bbox_begin)
+  target_height, target_width, _ = tf.unstack(bbox_size)
+  crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
+
+  # Use the fused decode and crop op here, which is faster than each in series.
+  cropped = tf.image.decode_and_crop_jpeg(
+      image_buffer, crop_window, channels=num_channels)
+
+  # Flip to add a little more random distortion in.
+  cropped = tf.image.random_flip_left_right(cropped)
+  return cropped
+
+
+def _central_crop(image, crop_height, crop_width):
+  """Performs central crops of the given image list.
+
+  Args:
+    image: a 3-D image tensor
+    crop_height: the height of the image following the crop.
+    crop_width: the width of the image following the crop.
+
+  Returns:
+    3-D tensor with cropped image.
+  """
+  shape = tf.shape(input=image)
+  height, width = shape[0], shape[1]
+
+  amount_to_be_cropped_h = (height - crop_height)
+  crop_top = amount_to_be_cropped_h // 2
+  amount_to_be_cropped_w = (width - crop_width)
+  crop_left = amount_to_be_cropped_w // 2
+  return tf.slice(
+      image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
+
+
+def _mean_image_subtraction(image, means, num_channels):
+  """Subtracts the given means from each image channel.
+
+  For example:
+    means = [123.68, 116.779, 103.939]
+    image = _mean_image_subtraction(image, means)
+
+  Note that the rank of `image` must be known.
+
+  Args:
+    image: a tensor of size [height, width, C].
+    means: a C-vector of values to subtract from each channel.
+    num_channels: number of color channels in the image that will be distorted.
+
+  Returns:
+    the centered image.
+
+  Raises:
+    ValueError: If the rank of `image` is unknown, if `image` has a rank other
+      than three or if the number of channels in `image` doesn't match the
+      number of values in `means`.
+  """
+  if image.get_shape().ndims != 3:
+    raise ValueError('Input must be of size [height, width, C>0]')
+
+  if len(means) != num_channels:
+    raise ValueError('len(means) must match the number of channels')
+
+  # We have a 1-D tensor of means; convert to 3-D.
+  # Note(b/130245863): we explicitly call `broadcast` instead of simply
+  # expanding dimensions for better performance.
+  means = tf.broadcast_to(means, tf.shape(image))
+
+  return image - means
+
+
+def _smallest_size_at_least(height, width, resize_min):
+  """Computes new shape with the smallest side equal to `smallest_side`.
+
+  Computes new shape with the smallest side equal to `smallest_side` while
+  preserving the original aspect ratio.
+
+  Args:
+    height: an int32 scalar tensor indicating the current height.
+    width: an int32 scalar tensor indicating the current width.
+    resize_min: A python integer or scalar `Tensor` indicating the size of
+      the smallest side after resize.
+
+  Returns:
+    new_height: an int32 scalar tensor indicating the new height.
+    new_width: an int32 scalar tensor indicating the new width.
+  """
+  resize_min = tf.cast(resize_min, tf.float32)
+
+  # Convert to floats to make subsequent calculations go smoothly.
+  height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32)
+
+  smaller_dim = tf.minimum(height, width)
+  scale_ratio = resize_min / smaller_dim
+
+  # Convert back to ints to make heights and widths that TF ops will accept.
+  new_height = tf.cast(height * scale_ratio, tf.int32)
+  new_width = tf.cast(width * scale_ratio, tf.int32)
+
+  return new_height, new_width
+
+
+def _aspect_preserving_resize(image, resize_min):
+  """Resize images preserving the original aspect ratio.
+
+  Args:
+    image: A 3-D image `Tensor`.
+    resize_min: A python integer or scalar `Tensor` indicating the size of
+      the smallest side after resize.
+
+  Returns:
+    resized_image: A 3-D tensor containing the resized image.
+  """
+  shape = tf.shape(input=image)
+  height, width = shape[0], shape[1]
+
+  new_height, new_width = _smallest_size_at_least(height, width, resize_min)
+
+  return _resize_image(image, new_height, new_width)
+
+
+def _resize_image(image, height, width):
+  """Simple wrapper around tf.resize_images.
+
+  This is primarily to make sure we use the same `ResizeMethod` and other
+  details each time.
+
+  Args:
+    image: A 3-D image `Tensor`.
+    height: The target height for the resized image.
+    width: The target width for the resized image.
+
+  Returns:
+    resized_image: A 3-D tensor containing the resized image. The first two
+      dimensions have the shape [height, width].
+  """
+  return tf.compat.v1.image.resize(
+      image, [height, width], method=tf.image.ResizeMethod.BILINEAR,
+      align_corners=False)
+
+
+def preprocess_image(image_buffer, bbox, output_height, output_width,
+                     num_channels, is_training=False):
+  """Preprocesses the given image.
+
+  Preprocessing includes decoding, cropping, and resizing for both training
+  and eval images. Training preprocessing, however, introduces some random
+  distortion of the image to improve accuracy.
+
+  Args:
+    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
+    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
+      where each coordinate is [0, 1) and the coordinates are arranged as
+      [ymin, xmin, ymax, xmax].
+    output_height: The height of the image after preprocessing.
+    output_width: The width of the image after preprocessing.
+    num_channels: Integer depth of the image buffer for decoding.
+    is_training: `True` if we're preprocessing the image for training and
+      `False` otherwise.
+
+  Returns:
+    A preprocessed image.
+  """
+  if is_training:
+    # For training, we want to randomize some of the distortions.
+    image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
+    image = _resize_image(image, output_height, output_width)
+  else:
+    # For validation, we want to decode, resize, then just crop the middle.
+    image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
+    image = _aspect_preserving_resize(image, _RESIZE_MIN)
+    image = _central_crop(image, output_height, output_width)
+
+  image.set_shape([output_height, output_width, num_channels])
+
+  return _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels)
--- a/official/resnet/keras/keras_cifar_benchmark.py
+++ b/official/resnet/keras/keras_cifar_benchmark.py
@@ -288,6 +288,17 @@ class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

+  def benchmark_1_gpu_xla(self):
+    """Test 1 gpu with xla enabled."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.enable_eager = True
+    FLAGS.enable_xla = True
+    FLAGS.distribution_strategy = 'default'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
+    FLAGS.batch_size = 128
+    self._run_and_report_benchmark()
+
  def benchmark_1_gpu_force_v2(self):
    """Test 1 gpu using forced v2 execution path."""
    self._setup()

--- a/official/resnet/keras/keras_cifar_main.py
+++ b/official/resnet/keras/keras_cifar_main.py
@@ -20,9 +20,9 @@ from __future__ import print_function

 from absl import app as absl_app
 from absl import flags
-import tensorflow as tf  # pylint: disable=g-bad-import-order
+import tensorflow as tf

-from official.resnet import cifar10_main as cifar_main
+from official.resnet.keras import cifar_preprocessing
 from official.resnet.keras import keras_common
 from official.resnet.keras import resnet_cifar_model
 from official.utils.flags import core as flags_core
@@ -65,28 +65,6 @@ def learning_rate_schedule(current_epoch,
  return learning_rate


-def parse_record_keras(raw_record, is_training, dtype):
-  """Parses a record containing a training example of an image.
-
-  The input record is parsed into a label and image, and the image is passed
-  through preprocessing steps (cropping, flipping, and so on).
-
-  This method converts the label to one hot to fit the loss function.
-
-  Args:
-    raw_record: scalar Tensor tf.string containing a serialized
-      Example protocol buffer.
-    is_training: A boolean denoting whether the input is for training.
-    dtype: Data type to use for input images.
-
-  Returns:
-    Tuple with processed image tensor and one-hot-encoded label tensor.
-  """
-  image, label = cifar_main.parse_record(raw_record, is_training, dtype)
-  label = tf.compat.v1.sparse_to_dense(label, (cifar_main.NUM_CLASSES,), 1)
-  return image, label
-
-
 def run(flags_obj):
  """Run ResNet Cifar-10 training and eval loop using native Keras APIs.

@@ -141,22 +119,22 @@ def run(flags_obj):
  if flags_obj.use_synthetic_data:
    distribution_utils.set_up_synthetic_data()
    input_fn = keras_common.get_synth_input_fn(
-        height=cifar_main.HEIGHT,
-        width=cifar_main.WIDTH,
-        num_channels=cifar_main.NUM_CHANNELS,
-        num_classes=cifar_main.NUM_CLASSES,
+        height=cifar_preprocessing.HEIGHT,
+        width=cifar_preprocessing.WIDTH,
+        num_channels=cifar_preprocessing.NUM_CHANNELS,
+        num_classes=cifar_preprocessing.NUM_CLASSES,
        dtype=flags_core.get_tf_dtype(flags_obj),
        drop_remainder=True)
  else:
    distribution_utils.undo_set_up_synthetic_data()
-    input_fn = cifar_main.input_fn
+    input_fn = cifar_preprocessing.input_fn

  train_input_dataset = input_fn(
      is_training=True,
      data_dir=flags_obj.data_dir,
      batch_size=flags_obj.batch_size,
      num_epochs=flags_obj.train_epochs,
-      parse_record_fn=parse_record_keras,
+      parse_record_fn=cifar_preprocessing.parse_record,
      datasets_num_private_threads=flags_obj.datasets_num_private_threads,
      dtype=dtype,
      # Setting drop_remainder to avoid the partial batch logic in normalization
@@ -171,11 +149,11 @@ def run(flags_obj):
        data_dir=flags_obj.data_dir,
        batch_size=flags_obj.batch_size,
        num_epochs=flags_obj.train_epochs,
-        parse_record_fn=parse_record_keras)
+        parse_record_fn=cifar_preprocessing.parse_record)

  with strategy_scope:
    optimizer = keras_common.get_optimizer()
-    model = resnet_cifar_model.resnet56(classes=cifar_main.NUM_CLASSES)
+    model = resnet_cifar_model.resnet56(classes=cifar_preprocessing.NUM_CLASSES)

    model.compile(
        loss='categorical_crossentropy',
@@ -186,16 +164,16 @@ def run(flags_obj):
        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)

  callbacks = keras_common.get_callbacks(
-      learning_rate_schedule, cifar_main.NUM_IMAGES['train'])
+      learning_rate_schedule, cifar_preprocessing.NUM_IMAGES['train'])

-  train_steps = cifar_main.NUM_IMAGES['train'] // flags_obj.batch_size
+  train_steps = cifar_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size
  train_epochs = flags_obj.train_epochs

  if flags_obj.train_steps:
    train_steps = min(flags_obj.train_steps, train_steps)
    train_epochs = 1

-  num_eval_steps = (cifar_main.NUM_IMAGES['validation'] //
+  num_eval_steps = (cifar_preprocessing.NUM_IMAGES['validation'] //
                    flags_obj.batch_size)

  validation_data = eval_input_dataset

--- a/official/resnet/keras/keras_cifar_test.py
+++ b/official/resnet/keras/keras_cifar_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 from tempfile import mkdtemp
 import tensorflow as tf

-from official.resnet import cifar10_main
+from official.resnet.keras import cifar_preprocessing
 from official.resnet.keras import keras_cifar_main
 from official.resnet.keras import keras_common
 from official.utils.misc import keras_utils
@@ -53,7 +53,7 @@ class KerasCifarTest(googletest.TestCase):

  def setUp(self):
    super(KerasCifarTest, self).setUp()
-    cifar10_main.NUM_IMAGES["validation"] = 4
+    cifar_preprocessing.NUM_IMAGES["validation"] = 4

  def tearDown(self):
    super(KerasCifarTest, self).tearDown()

--- a/official/resnet/keras/keras_imagenet_main.py
+++ b/official/resnet/keras/keras_imagenet_main.py
@@ -20,9 +20,10 @@ from __future__ import print_function

 from absl import app as absl_app
 from absl import flags
+from absl import logging
 import tensorflow as tf  # pylint: disable=g-bad-import-order

-from official.resnet import imagenet_main
+from official.resnet.keras import imagenet_preprocessing
 from official.resnet.keras import keras_common
 from official.resnet.keras import resnet_model
 from official.resnet.keras import trivial_model
@@ -70,17 +71,6 @@ def learning_rate_schedule(current_epoch,
  return learning_rate


-def parse_record_keras(raw_record, is_training, dtype):
-  """Adjust the shape of label."""
-  image, label = imagenet_main.parse_record(raw_record, is_training, dtype)
-
-  # Subtract one so that labels are in [0, 1000), and cast to float32 for
-  # Keras model.
-  label = tf.cast(tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1,
-                  dtype=tf.float32)
-  return image, label
-
-
 def run(flags_obj):
  """Run ResNet ImageNet training and eval loop using native Keras APIs.

@@ -138,15 +128,15 @@ def run(flags_obj):
  if flags_obj.use_synthetic_data:
    distribution_utils.set_up_synthetic_data()
    input_fn = keras_common.get_synth_input_fn(
-        height=imagenet_main.DEFAULT_IMAGE_SIZE,
-        width=imagenet_main.DEFAULT_IMAGE_SIZE,
-        num_channels=imagenet_main.NUM_CHANNELS,
-        num_classes=imagenet_main.NUM_CLASSES,
+        height=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
+        width=imagenet_preprocessing.DEFAULT_IMAGE_SIZE,
+        num_channels=imagenet_preprocessing.NUM_CHANNELS,
+        num_classes=imagenet_preprocessing.NUM_CLASSES,
        dtype=dtype,
        drop_remainder=True)
  else:
    distribution_utils.undo_set_up_synthetic_data()
-    input_fn = imagenet_main.input_fn
+    input_fn = imagenet_preprocessing.input_fn

  # When `enable_xla` is True, we always drop the remainder of the batches
  # in the dataset, as XLA-GPU doesn't support dynamic shapes.
@@ -157,7 +147,7 @@ def run(flags_obj):
      data_dir=flags_obj.data_dir,
      batch_size=flags_obj.batch_size,
      num_epochs=flags_obj.train_epochs,
-      parse_record_fn=parse_record_keras,
+      parse_record_fn=imagenet_preprocessing.parse_record,
      datasets_num_private_threads=flags_obj.datasets_num_private_threads,
      dtype=dtype,
      drop_remainder=drop_remainder,
@@ -171,7 +161,7 @@ def run(flags_obj):
        data_dir=flags_obj.data_dir,
        batch_size=flags_obj.batch_size,
        num_epochs=flags_obj.train_epochs,
-        parse_record_fn=parse_record_keras,
+        parse_record_fn=imagenet_preprocessing.parse_record,
        dtype=dtype,
        drop_remainder=drop_remainder)

@@ -179,7 +169,7 @@ def run(flags_obj):
  if flags_obj.use_tensor_lr:
    lr_schedule = keras_common.PiecewiseConstantDecayWithWarmup(
        batch_size=flags_obj.batch_size,
-        epoch_size=imagenet_main.NUM_IMAGES['train'],
+        epoch_size=imagenet_preprocessing.NUM_IMAGES['train'],
        warmup_epochs=LR_SCHEDULE[0][1],
        boundaries=list(p[1] for p in LR_SCHEDULE[1:]),
        multipliers=list(p[0] for p in LR_SCHEDULE),
@@ -195,11 +185,11 @@ def run(flags_obj):
                                                          default_for_fp16=128))

    if flags_obj.use_trivial_model:
-      model = trivial_model.trivial_model(imagenet_main.NUM_CLASSES, dtype)
+      model = trivial_model.trivial_model(
+          imagenet_preprocessing.NUM_CLASSES, dtype)
    else:
      model = resnet_model.resnet50(
-          num_classes=imagenet_main.NUM_CLASSES,
-          dtype=dtype)
+          num_classes=imagenet_preprocessing.NUM_CLASSES, dtype=dtype)

    model.compile(
        loss='sparse_categorical_crossentropy',
@@ -210,17 +200,18 @@ def run(flags_obj):
        experimental_run_tf_function=flags_obj.force_v2_in_keras_compile)

  callbacks = keras_common.get_callbacks(
-      learning_rate_schedule, imagenet_main.NUM_IMAGES['train'])
+      learning_rate_schedule, imagenet_preprocessing.NUM_IMAGES['train'])

-  train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size
+  train_steps = (
+      imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
  train_epochs = flags_obj.train_epochs

  if flags_obj.train_steps:
    train_steps = min(flags_obj.train_steps, train_steps)
    train_epochs = 1

-  num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] //
-                    flags_obj.batch_size)
+  num_eval_steps = (
+      imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size)

  validation_data = eval_input_dataset
  if flags_obj.skip_eval:
@@ -271,10 +262,10 @@ def main(_):
  model_helpers.apply_clean(flags.FLAGS)
  with logger.benchmark_context(flags.FLAGS):
    stats = run(flags.FLAGS)
-  tf.compat.v1.logging.info('Run stats:\n%s' % stats)
+  logging.info('Run stats:\n%s', stats)


 if __name__ == '__main__':
-  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+  logging.set_verbosity(logging.INFO)
  define_imagenet_keras_flags()
  absl_app.run(main)
--- a/official/resnet/keras/keras_imagenet_test.py
+++ b/official/resnet/keras/keras_imagenet_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 from tempfile import mkdtemp
 import tensorflow as tf

-from official.resnet import imagenet_main
+from official.resnet.keras import imagenet_preprocessing
 from official.resnet.keras import keras_imagenet_main
 from official.utils.misc import keras_utils
 from official.utils.testing import integration
@@ -52,7 +52,7 @@ class KerasImagenetTest(googletest.TestCase):

  def setUp(self):
    super(KerasImagenetTest, self).setUp()
-    imagenet_main.NUM_IMAGES["validation"] = 4
+    imagenet_preprocessing.NUM_IMAGES["validation"] = 4

  def tearDown(self):
    super(KerasImagenetTest, self).tearDown()