Commit 64f6527c authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by TF Object Detection Team
Browse files

Enable deterministic center cropping/padding in object detection preprocessing.

PiperOrigin-RevId: 371929331
parent 8f58f396
...@@ -1776,6 +1776,7 @@ def random_pad_image(image, ...@@ -1776,6 +1776,7 @@ def random_pad_image(image,
min_image_size=None, min_image_size=None,
max_image_size=None, max_image_size=None,
pad_color=None, pad_color=None,
center_pad=False,
seed=None, seed=None,
preprocess_vars_cache=None): preprocess_vars_cache=None):
"""Randomly pads the image. """Randomly pads the image.
...@@ -1814,6 +1815,8 @@ def random_pad_image(image, ...@@ -1814,6 +1815,8 @@ def random_pad_image(image,
pad_color: padding color. A rank 1 tensor of [channels] with dtype= pad_color: padding color. A rank 1 tensor of [channels] with dtype=
tf.float32. if set as None, it will be set to average color of tf.float32. if set as None, it will be set to average color of
the input image. the input image.
center_pad: whether the original image will be padded to the center, or
randomly padded (which is default).
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this performed augmentations. Updated in-place. If this
...@@ -1870,6 +1873,12 @@ def random_pad_image(image, ...@@ -1870,6 +1873,12 @@ def random_pad_image(image,
lambda: _random_integer(0, target_width - image_width, seed), lambda: _random_integer(0, target_width - image_width, seed),
lambda: tf.constant(0, dtype=tf.int32)) lambda: tf.constant(0, dtype=tf.int32))
if center_pad:
offset_height = tf.cast(tf.floor((target_height - image_height) / 2),
tf.int32)
offset_width = tf.cast(tf.floor((target_width - image_width) / 2),
tf.int32)
gen_func = lambda: (target_height, target_width, offset_height, offset_width) gen_func = lambda: (target_height, target_width, offset_height, offset_width)
params = _get_or_create_preprocess_rand_vars( params = _get_or_create_preprocess_rand_vars(
gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE, gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
...@@ -2113,7 +2122,7 @@ def random_crop_pad_image(image, ...@@ -2113,7 +2122,7 @@ def random_crop_pad_image(image,
max_padded_size_ratio, max_padded_size_ratio,
dtype=tf.int32) dtype=tf.int32)
padded_image, padded_boxes = random_pad_image( padded_image, padded_boxes = random_pad_image( # pylint: disable=unbalanced-tuple-unpacking
cropped_image, cropped_image,
cropped_boxes, cropped_boxes,
min_image_size=min_image_size, min_image_size=min_image_size,
...@@ -2153,6 +2162,7 @@ def random_crop_to_aspect_ratio(image, ...@@ -2153,6 +2162,7 @@ def random_crop_to_aspect_ratio(image,
aspect_ratio=1.0, aspect_ratio=1.0,
overlap_thresh=0.3, overlap_thresh=0.3,
clip_boxes=True, clip_boxes=True,
center_crop=False,
seed=None, seed=None,
preprocess_vars_cache=None): preprocess_vars_cache=None):
"""Randomly crops an image to the specified aspect ratio. """Randomly crops an image to the specified aspect ratio.
...@@ -2191,6 +2201,7 @@ def random_crop_to_aspect_ratio(image, ...@@ -2191,6 +2201,7 @@ def random_crop_to_aspect_ratio(image,
overlap_thresh: minimum overlap thresh with new cropped overlap_thresh: minimum overlap thresh with new cropped
image to keep the box. image to keep the box.
clip_boxes: whether to clip the boxes to the cropped image. clip_boxes: whether to clip the boxes to the cropped image.
center_crop: whether to take the center crop or a random crop.
seed: random seed. seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this performed augmentations. Updated in-place. If this
...@@ -2247,8 +2258,14 @@ def random_crop_to_aspect_ratio(image, ...@@ -2247,8 +2258,14 @@ def random_crop_to_aspect_ratio(image,
# either offset_height = 0 and offset_width is randomly chosen from # either offset_height = 0 and offset_width is randomly chosen from
# [0, offset_width - target_width), or else offset_width = 0 and # [0, offset_width - target_width), or else offset_width = 0 and
# offset_height is randomly chosen from [0, offset_height - target_height) # offset_height is randomly chosen from [0, offset_height - target_height)
offset_height = _random_integer(0, orig_height - target_height + 1, seed) if center_crop:
offset_width = _random_integer(0, orig_width - target_width + 1, seed) offset_height = tf.cast(tf.math.floor((orig_height - target_height) / 2),
tf.int32)
offset_width = tf.cast(tf.math.floor((orig_width - target_width) / 2),
tf.int32)
else:
offset_height = _random_integer(0, orig_height - target_height + 1, seed)
offset_width = _random_integer(0, orig_width - target_width + 1, seed)
generator_func = lambda: (offset_height, offset_width) generator_func = lambda: (offset_height, offset_width)
offset_height, offset_width = _get_or_create_preprocess_rand_vars( offset_height, offset_width = _get_or_create_preprocess_rand_vars(
...@@ -2979,7 +2996,7 @@ def resize_to_range(image, ...@@ -2979,7 +2996,7 @@ def resize_to_range(image,
'per-channel pad value.') 'per-channel pad value.')
new_image = tf.stack( new_image = tf.stack(
[ [
tf.pad( tf.pad( # pylint: disable=g-complex-comprehension
channels[i], [[0, max_dimension - new_size[0]], channels[i], [[0, max_dimension - new_size[0]],
[0, max_dimension - new_size[1]]], [0, max_dimension - new_size[1]]],
constant_values=per_channel_pad_value[i]) constant_values=per_channel_pad_value[i])
......
...@@ -2194,6 +2194,54 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2194,6 +2194,54 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
expected_boxes.flatten()) expected_boxes.flatten())
self.assertAllEqual(distorted_masks_.shape, [1, 200, 200]) self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
def testRunRandomCropToAspectRatioCenterCrop(self):
def graph_fn():
image = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
weights = self.createTestGroundtruthWeights()
masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
tensor_dict = {
fields.InputDataFields.image: image,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.groundtruth_weights: weights,
fields.InputDataFields.groundtruth_instance_masks: masks
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True)
preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
'center_crop': True
})]
with mock.patch.object(preprocessor,
'_random_integer') as mock_random_integer:
mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict,
preprocessing_options,
func_arg_map=preprocessor_arg_map)
distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_labels = distorted_tensor_dict[
fields.InputDataFields.groundtruth_classes]
return [
distorted_image, distorted_boxes, distorted_labels
]
(distorted_image_, distorted_boxes_, distorted_labels_) = self.execute_cpu(
graph_fn, [])
expected_boxes = np.array([[0.0, 0.0, 0.75, 1.0],
[0.25, 0.5, 0.75, 1.0]], dtype=np.float32)
self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
self.assertAllEqual(distorted_labels_, [1, 2])
self.assertAllClose(distorted_boxes_.flatten(),
expected_boxes.flatten())
def testRunRandomCropToAspectRatioWithKeypoints(self): def testRunRandomCropToAspectRatioWithKeypoints(self):
def graph_fn(): def graph_fn():
image = self.createColorfulTestImage() image = self.createColorfulTestImage()
...@@ -2433,6 +2481,51 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase): ...@@ -2433,6 +2481,51 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= ( self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
padded_boxes_[:, 3] - padded_boxes_[:, 1]))) padded_boxes_[:, 3] - padded_boxes_[:, 1])))
def testRandomPadImageCenterPad(self):
def graph_fn():
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
})]
images = self.createColorfulTestImage()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
}
tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
images = tensor_dict[fields.InputDataFields.image]
preprocessing_options = [(preprocessor.random_pad_image, {
'center_pad': True,
'min_image_size': [400, 400],
'max_image_size': [400, 400],
})]
padded_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
padded_images = padded_tensor_dict[fields.InputDataFields.image]
padded_boxes = padded_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
padded_labels = padded_tensor_dict[
fields.InputDataFields.groundtruth_classes]
return [padded_images, padded_boxes, padded_labels]
(padded_images_, padded_boxes_, padded_labels_) = self.execute_cpu(
graph_fn, [])
expected_boxes = np.array([[0.25, 0.25, 0.625, 1.0],
[0.375, 0.5, .625, 1.0]], dtype=np.float32)
self.assertAllEqual(padded_images_.shape, [1, 400, 400, 3])
self.assertAllEqual(padded_labels_, [1, 2])
self.assertAllClose(padded_boxes_.flatten(),
expected_boxes.flatten())
@parameterized.parameters( @parameterized.parameters(
{'include_dense_pose': False}, {'include_dense_pose': False},
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment