"tests/git@developer.sourcefind.cn:OpenDAS/apex.git" did not exist on "016c8d4f694f4f3d06bf9ee1d8442e955660fce5"
Unverified Commit 9a30bb66 authored by Karmel Allison's avatar Karmel Allison Committed by GitHub
Browse files

Performance tuning image preprocessing (#3383)

* Refining preprocessing, part 1

* Refinements to preprocessing resulting from multi-GPU tests

* Reviving one-hot labels

* Reviving one-hot labels

* Fixing label shapes

* Adding random flip back in

* Reverting unnecessary linting of test file

* Respond to CR

* Respond to CR

* Respond to CR
parent 4c054148
...@@ -54,37 +54,58 @@ def get_filenames(is_training, data_dir): ...@@ -54,37 +54,58 @@ def get_filenames(is_training, data_dir):
for i in range(128)] for i in range(128)]
def parse_record(raw_record, is_training): def _parse_example_proto(example_serialized):
"""Parse an ImageNet record from `value`.""" """Parses an Example proto containing a training example of an image.
keys_to_features = {
'image/encoded': The dataset contains serialized Example protocol buffers.
tf.FixedLenFeature((), tf.string, default_value=''), The Example proto is expected to contain features named
'image/format': image/encoded (a JPEG-encoded string) and image/class/label (int)
tf.FixedLenFeature((), tf.string, default_value='jpeg'),
'image/class/label': Args:
tf.FixedLenFeature([], dtype=tf.int64, default_value=-1), example_serialized: scalar Tensor tf.string containing a serialized
'image/class/text': Example protocol buffer.
tf.FixedLenFeature([], dtype=tf.string, default_value=''),
'image/object/bbox/xmin': Returns:
tf.VarLenFeature(dtype=tf.float32), image_buffer: Tensor tf.string containing the contents of a JPEG file.
'image/object/bbox/ymin': label: Tensor tf.int64 containing the label.
tf.VarLenFeature(dtype=tf.float32), """
'image/object/bbox/xmax': # Dense features in Example proto.
tf.VarLenFeature(dtype=tf.float32), feature_map = {
'image/object/bbox/ymax': 'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
tf.VarLenFeature(dtype=tf.float32), default_value=''),
'image/object/class/label': 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
tf.VarLenFeature(dtype=tf.int64), default_value=-1)
} }
parsed = tf.parse_single_example(raw_record, keys_to_features) features = tf.parse_single_example(example_serialized, feature_map)
return features['image/encoded'], features['image/class/label']
image = tf.image.decode_image(
tf.reshape(parsed['image/encoded'], shape=[]),
_NUM_CHANNELS)
# Note that tf.image.convert_image_dtype scales the image data to [0, 1). def parse_record(raw_record, is_training):
image = tf.image.convert_image_dtype(image, dtype=tf.float32) """Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
Args:
raw_record: scalar Tensor tf.string containing a serialized
Example protocol buffer.
is_training: A boolean denoting whether the input is for training.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
image, label = _parse_example_proto(raw_record)
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
# Results in a 3-D int8 Tensor which we then convert to a float
# with values ranging from [0, 1).
image = tf.image.decode_jpeg(image, channels=_NUM_CHANNELS)
image = tf.image.convert_image_dtype(image, tf.float32)
image = vgg_preprocessing.preprocess_image( image = vgg_preprocessing.preprocess_image(
image=image, image=image,
...@@ -92,11 +113,10 @@ def parse_record(raw_record, is_training): ...@@ -92,11 +113,10 @@ def parse_record(raw_record, is_training):
output_width=_DEFAULT_IMAGE_SIZE, output_width=_DEFAULT_IMAGE_SIZE,
is_training=is_training) is_training=is_training)
label = tf.cast( label = tf.cast(tf.reshape(label, shape=[]), dtype=tf.int32)
tf.reshape(parsed['image/class/label'], shape=[]), label = tf.one_hot(label, _NUM_CLASSES)
dtype=tf.int32)
return image, tf.one_hot(label, _NUM_CLASSES) return image, label
def input_fn(is_training, data_dir, batch_size, num_epochs=1, def input_fn(is_training, data_dir, batch_size, num_epochs=1,
......
...@@ -42,155 +42,62 @@ _RESIZE_SIDE_MIN = 256 ...@@ -42,155 +42,62 @@ _RESIZE_SIDE_MIN = 256
_RESIZE_SIDE_MAX = 512 _RESIZE_SIDE_MAX = 512
def _crop(image, offset_height, offset_width, crop_height, crop_width): def _get_h_w(image):
"""Crops the given image using the provided offsets and sizes. """Convenience for grabbing the height and width of an image.
Note that the method doesn't assume we know the input image size but it does
assume we know the input image rank.
Args:
image: an image of shape [height, width, channels].
offset_height: a scalar tensor indicating the height offset.
offset_width: a scalar tensor indicating the width offset.
crop_height: the height of the cropped image.
crop_width: the width of the cropped image.
Returns:
the cropped (and resized) image.
Raises:
InvalidArgumentError: if the rank is not 3 or if the image dimensions are
less than the crop size.
""" """
original_shape = tf.shape(image) shape = tf.shape(image)
return shape[0], shape[1]
rank_assertion = tf.Assert(
tf.equal(tf.rank(image), 3),
['Rank of image must be equal to 3.'])
with tf.control_dependencies([rank_assertion]):
cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])
size_assertion = tf.Assert(
tf.logical_and(
tf.greater_equal(original_shape[0], crop_height),
tf.greater_equal(original_shape[1], crop_width)),
['Crop size greater than the image size.'])
offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to
# define the crop size.
with tf.control_dependencies([size_assertion]):
image = tf.slice(image, offsets, cropped_shape)
return tf.reshape(image, cropped_shape)
def _random_crop(image_list, crop_height, crop_width):
"""Crops the given list of images.
The function applies the same crop to each image in the list. This can be
effectively applied when there are multiple image inputs of the same
dimension such as:
image, depths, normals = _random_crop([image, depths, normals], 120, 150) def _random_crop_and_flip(image, crop_height, crop_width):
"""Crops the given image to a random part of the image, and randomly flips.
Args: Args:
image_list: a list of image tensors of the same dimension but possibly image: a 3-D image tensor
varying channel.
crop_height: the new height. crop_height: the new height.
crop_width: the new width. crop_width: the new width.
Returns: Returns:
the image_list with cropped images. 3-D tensor with cropped image.
Raises:
ValueError: if there are multiple image inputs provided with different size
or the images are smaller than the crop dimensions.
""" """
if not image_list: height, width = _get_h_w(image)
raise ValueError('Empty image_list.')
# Compute the rank assertions.
rank_assertions = []
for i in range(len(image_list)):
image_rank = tf.rank(image_list[i])
rank_assert = tf.Assert(
tf.equal(image_rank, 3),
['Wrong rank for tensor %s [expected] [actual]',
image_list[i].name, 3, image_rank])
rank_assertions.append(rank_assert)
with tf.control_dependencies([rank_assertions[0]]):
image_shape = tf.shape(image_list[0])
image_height = image_shape[0]
image_width = image_shape[1]
crop_size_assert = tf.Assert(
tf.logical_and(
tf.greater_equal(image_height, crop_height),
tf.greater_equal(image_width, crop_width)),
['Crop size greater than the image size.'])
asserts = [rank_assertions[0], crop_size_assert]
for i in range(1, len(image_list)):
image = image_list[i]
asserts.append(rank_assertions[i])
with tf.control_dependencies([rank_assertions[i]]):
shape = tf.shape(image)
height = shape[0]
width = shape[1]
height_assert = tf.Assert(
tf.equal(height, image_height),
['Wrong height for tensor %s [expected][actual]',
image.name, height, image_height])
width_assert = tf.Assert(
tf.equal(width, image_width),
['Wrong width for tensor %s [expected][actual]',
image.name, width, image_width])
asserts.extend([height_assert, width_assert])
# Create a random bounding box. # Create a random bounding box.
# #
# Use tf.random_uniform and not numpy.random.rand as doing the former would # Use tf.random_uniform and not numpy.random.rand as doing the former would
# generate random numbers at graph eval time, unlike the latter which # generate random numbers at graph eval time, unlike the latter which
# generates random numbers at graph definition time. # generates random numbers at graph definition time.
with tf.control_dependencies(asserts): total_crop_height = (height - crop_height)
max_offset_height = tf.reshape(image_height - crop_height + 1, []) crop_top = tf.random_uniform([], maxval=total_crop_height + 1, dtype=tf.int32)
with tf.control_dependencies(asserts): total_crop_width = (width - crop_width)
max_offset_width = tf.reshape(image_width - crop_width + 1, []) crop_left = tf.random_uniform([], maxval=total_crop_width + 1, dtype=tf.int32)
offset_height = tf.random_uniform(
[], maxval=max_offset_height, dtype=tf.int32)
offset_width = tf.random_uniform(
[], maxval=max_offset_width, dtype=tf.int32)
return [_crop(image, offset_height, offset_width, cropped = tf.slice(
crop_height, crop_width) for image in image_list] image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
cropped = tf.image.random_flip_left_right(cropped)
return cropped
def _central_crop(image_list, crop_height, crop_width): def _central_crop(image, crop_height, crop_width):
"""Performs central crops of the given image list. """Performs central crops of the given image list.
Args: Args:
image_list: a list of image tensors of the same dimension but possibly image: a 3-D image tensor
varying channel.
crop_height: the height of the image following the crop. crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop. crop_width: the width of the image following the crop.
Returns: Returns:
the list of cropped images. 3-D tensor with cropped image.
""" """
outputs = [] height, width = _get_h_w(image)
for image in image_list:
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
offset_height = (image_height - crop_height) / 2 total_crop_height = (height - crop_height)
offset_width = (image_width - crop_width) / 2 crop_top = total_crop_height // 2
total_crop_width = (width - crop_width)
outputs.append(_crop(image, offset_height, offset_width, crop_left = total_crop_width // 2
crop_height, crop_width)) return tf.slice(
return outputs image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
def _mean_image_subtraction(image, means): def _mean_image_subtraction(image, means):
...@@ -220,10 +127,10 @@ def _mean_image_subtraction(image, means): ...@@ -220,10 +127,10 @@ def _mean_image_subtraction(image, means):
if len(means) != num_channels: if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels') raise ValueError('len(means) must match the number of channels')
channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) # We have a 1-D tensor of means; convert to 3-D.
for i in range(num_channels): means = tf.expand_dims(tf.expand_dims(means, 0), 0)
channels[i] -= means[i]
return tf.concat(axis=2, values=channels) return image - means
def _smallest_size_at_least(height, width, smallest_side): def _smallest_size_at_least(height, width, smallest_side):
...@@ -242,17 +149,16 @@ def _smallest_size_at_least(height, width, smallest_side): ...@@ -242,17 +149,16 @@ def _smallest_size_at_least(height, width, smallest_side):
new_height: an int32 scalar tensor indicating the new height. new_height: an int32 scalar tensor indicating the new height.
new_width: and int32 scalar tensor indicating the new width. new_width: and int32 scalar tensor indicating the new width.
""" """
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) smallest_side = tf.cast(smallest_side, tf.float32)
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
height = tf.to_float(height) smaller_dim = tf.minimum(height, width)
width = tf.to_float(width) scale_ratio = smallest_side / smaller_dim
smallest_side = tf.to_float(smallest_side) new_height = tf.cast(height * scale_ratio, tf.int32)
new_width = tf.cast(width * scale_ratio, tf.int32)
scale = tf.cond(tf.greater(height, width),
lambda: smallest_side / width,
lambda: smallest_side / height)
new_height = tf.to_int32(height * scale)
new_width = tf.to_int32(width * scale)
return new_height, new_width return new_height, new_width
...@@ -269,68 +175,13 @@ def _aspect_preserving_resize(image, smallest_side): ...@@ -269,68 +175,13 @@ def _aspect_preserving_resize(image, smallest_side):
""" """
smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
shape = tf.shape(image) height, width = _get_h_w(image)
height = shape[0]
width = shape[1]
new_height, new_width = _smallest_size_at_least(height, width, smallest_side) new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
image = tf.expand_dims(image, 0)
resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
align_corners=False)
resized_image = tf.squeeze(resized_image)
resized_image.set_shape([None, None, 3])
return resized_image
def preprocess_for_train(image, resized_image = tf.image.resize_images(
output_height, image, [new_height, new_width], method=tf.image.ResizeMethod.BILINEAR,
output_width, align_corners=False)
resize_side_min=_RESIZE_SIDE_MIN, return resized_image
resize_side_max=_RESIZE_SIDE_MAX):
"""Preprocesses the given image for training.
Note that the actual resizing scale is sampled from
[`resize_size_min`, `resize_size_max`].
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side_min: The lower bound for the smallest side of the image for
aspect-preserving resizing.
resize_side_max: The upper bound for the smallest side of the image for
aspect-preserving resizing.
Returns:
A preprocessed image.
"""
resize_side = tf.random_uniform(
[], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)
image = _aspect_preserving_resize(image, resize_side)
image = _random_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.to_float(image)
image = tf.image.random_flip_left_right(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_for_eval(image, output_height, output_width, resize_side):
"""Preprocesses the given image for evaluation.
Args:
image: A `Tensor` representing an image of arbitrary size.
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
resize_side: The smallest side of the image for aspect-preserving resizing.
Returns:
A preprocessed image.
"""
image = _aspect_preserving_resize(image, resize_side)
image = _central_crop([image], output_height, output_width)[0]
image.set_shape([output_height, output_width, 3])
image = tf.to_float(image)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
def preprocess_image(image, output_height, output_width, is_training=False, def preprocess_image(image, output_height, output_width, is_training=False,
...@@ -356,8 +207,19 @@ def preprocess_image(image, output_height, output_width, is_training=False, ...@@ -356,8 +207,19 @@ def preprocess_image(image, output_height, output_width, is_training=False,
A preprocessed image. A preprocessed image.
""" """
if is_training: if is_training:
return preprocess_for_train(image, output_height, output_width, # For training, we want to randomize some of the distortions.
resize_side_min, resize_side_max) resize_side = tf.random_uniform(
[], minval=resize_side_min, maxval=resize_side_max + 1, dtype=tf.int32)
crop_fn = _random_crop_and_flip
else: else:
return preprocess_for_eval(image, output_height, output_width, resize_side = resize_side_min
resize_side_min) crop_fn = _central_crop
num_channels = image.get_shape().as_list()[-1]
image = _aspect_preserving_resize(image, resize_side)
image = crop_fn(image, output_height, output_width)
image.set_shape([output_height, output_width, num_channels])
image = tf.cast(image, tf.float32)
return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment