Commit a1facb46 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Add random_crop with boxes and labels and tests

PiperOrigin-RevId: 393039446
parent eceece58
......@@ -555,3 +555,183 @@ def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=1):
lambda: masks)
return image, normalized_boxes, masks
def random_crop_image_with_boxes_and_labels(img, boxes, labels, min_scale,
aspect_ratio_range,
min_overlap_params, max_retry):
"""Crops a random slice from the input image.
The function will correspondingly recompute the bounding boxes and filter out
outside boxes and their labels.
References:
[1] End-to-End Object Detection with Transformers
https://arxiv.org/abs/2005.12872
The preprocessing steps:
1. Sample a minimum IoU overlap.
2. For each trial, sample the new image width, height, and top-left corner.
3. Compute the IoUs of bounding boxes with the cropped image and retry if
the maximum IoU is below the sampled threshold.
4. Find boxes whose centers are in the cropped image.
5. Compute new bounding boxes in the cropped region and only select those
boxes' labels.
Args:
img: a 'Tensor' of shape [height, width, 3] representing the input image.
boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
boxes with (ymin, xmin, ymax, xmax).
labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
scale variable.
aspect_ratio_range: a list of two 'float' that specifies the lower and upper
bound of the random aspect ratio.
min_overlap_params: a list of four 'float' representing the min value, max
value, step size, and offset for the minimum overlap sample.
max_retry: an 'int' representing the number of trials for cropping. If it is
exhausted, no cropping will be performed.
Returns:
img: a Tensor representing the random cropped image. Can be the
original image if max_retry is exhausted.
boxes: a Tensor representing the bounding boxes in the cropped image.
labels: a Tensor representing the new bounding boxes' labels.
"""
shape = tf.shape(img)
original_h = shape[0]
original_w = shape[1]
minval, maxval, step, offset = min_overlap_params
min_overlap = tf.math.floordiv(
tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset
min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1)
if min_overlap > 1.0:
return img, boxes, labels
aspect_ratio_low = aspect_ratio_range[0]
aspect_ratio_high = aspect_ratio_range[1]
for _ in tf.range(max_retry):
scale_h = tf.random.uniform([], min_scale, 1.0)
scale_w = tf.random.uniform([], min_scale, 1.0)
new_h = tf.cast(
scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32)
new_w = tf.cast(
scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32)
# Aspect ratio has to be in the prespecified range
aspect_ratio = new_h / new_w
if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high:
continue
left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32)
right = left + new_w
top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32)
bottom = top + new_h
normalized_left = tf.cast(
left, dtype=tf.float32) / tf.cast(
original_w, dtype=tf.float32)
normalized_right = tf.cast(
right, dtype=tf.float32) / tf.cast(
original_w, dtype=tf.float32)
normalized_top = tf.cast(
top, dtype=tf.float32) / tf.cast(
original_h, dtype=tf.float32)
normalized_bottom = tf.cast(
bottom, dtype=tf.float32) / tf.cast(
original_h, dtype=tf.float32)
cropped_box = tf.expand_dims(
tf.stack([
normalized_top,
normalized_left,
normalized_bottom,
normalized_right,
]),
axis=0)
iou = box_ops.bbox_overlap(
tf.expand_dims(cropped_box, axis=0),
tf.expand_dims(boxes, axis=0)) # (1, 1, n_ground_truth)
iou = tf.squeeze(iou, axis=[0, 1])
# If not a single bounding box has a Jaccard overlap of greater than
# the minimum, try again
if tf.reduce_max(iou) < min_overlap:
continue
centroids = box_ops.yxyx_to_cycxhw(boxes)
mask = tf.math.logical_and(
tf.math.logical_and(centroids[:, 0] > normalized_top,
centroids[:, 0] < normalized_bottom),
tf.math.logical_and(centroids[:, 1] > normalized_left,
centroids[:, 1] < normalized_right))
# If not a single bounding box has its center in the crop, try again.
if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0:
indices = tf.squeeze(tf.where(mask), axis=1)
filtered_boxes = tf.gather(boxes, indices)
boxes = tf.clip_by_value(
(filtered_boxes[..., :] * tf.cast(
tf.stack([original_h, original_w, original_h, original_w]),
dtype=tf.float32) -
tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) /
tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32),
0.0, 1.0)
img = tf.image.crop_to_bounding_box(img, top, left, bottom - top,
right - left)
labels = tf.gather(labels, indices)
break
return img, boxes, labels
def random_crop(image,
boxes,
labels,
min_scale=0.3,
aspect_ratio_range=(0.5, 2.0),
min_overlap_params=(0.0, 1.4, 0.2, 0.1),
max_retry=50,
seed=None):
"""Randomly crop the image and boxes, filtering labels.
Args:
image: a 'Tensor' of shape [height, width, 3] representing the input image.
boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
boxes with (ymin, xmin, ymax, xmax).
labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
scale variable.
aspect_ratio_range: a list of two 'float' that specifies the lower and upper
bound of the random aspect ratio.
min_overlap_params: a list of four 'float' representing the min value, max
value, step size, and offset for the minimum overlap sample.
max_retry: an 'int' representing the number of trials for cropping. If it is
exhausted, no cropping will be performed.
seed: the random number seed of int, but could be None.
Returns:
image: a Tensor representing the random cropped image. Can be the
original image if max_retry is exhausted.
boxes: a Tensor representing the bounding boxes in the cropped image.
labels: a Tensor representing the new bounding boxes' labels.
"""
with tf.name_scope('random_crop'):
do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5)
if do_crop:
return random_crop_image_with_boxes_and_labels(image, boxes, labels,
min_scale,
aspect_ratio_range,
min_overlap_params,
max_retry)
else:
return image, boxes, labels
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for preprocess_ops.py."""
import io
......@@ -42,7 +41,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
([12, 2], 10),
([13, 2, 3], 10),
)
def testPadToFixedSize(self, input_shape, output_size):
def test_pad_to_fixed_size(self, input_shape, output_size):
# Copies input shape to padding shape.
clip_shape = input_shape[:]
clip_shape[0] = min(output_size, clip_shape[0])
......@@ -63,16 +62,11 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
(100, 256, 128, 256, 32, 1.0, 1.0, 128, 256),
(200, 512, 200, 128, 32, 0.25, 0.25, 224, 128),
)
def testResizeAndCropImageRectangluarCase(self,
input_height,
input_width,
desired_height,
desired_width,
stride,
scale_y,
scale_x,
output_height,
output_width):
def test_resize_and_crop_image_rectangluar_case(self, input_height,
input_width, desired_height,
desired_width, stride,
scale_y, scale_x,
output_height, output_width):
image = tf.convert_to_tensor(
np.random.rand(input_height, input_width, 3))
......@@ -98,16 +92,10 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
(100, 200, 220, 220, 32, 1.1, 1.1, 224, 224),
(512, 512, 1024, 1024, 32, 2.0, 2.0, 1024, 1024),
)
def testResizeAndCropImageSquareCase(self,
input_height,
input_width,
desired_height,
desired_width,
stride,
scale_y,
scale_x,
output_height,
output_width):
def test_resize_and_crop_image_square_case(self, input_height, input_width,
desired_height, desired_width,
stride, scale_y, scale_x,
output_height, output_width):
image = tf.convert_to_tensor(
np.random.rand(input_height, input_width, 3))
......@@ -135,18 +123,10 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
(100, 200, 80, 100, 32, 0.5, 0.5, 50, 100, 96, 128),
(200, 100, 80, 100, 32, 0.5, 0.5, 100, 50, 128, 96),
)
def testResizeAndCropImageV2(self,
input_height,
input_width,
short_side,
long_side,
stride,
scale_y,
scale_x,
desired_height,
desired_width,
output_height,
output_width):
def test_resize_and_crop_image_v2(self, input_height, input_width, short_side,
long_side, stride, scale_y, scale_x,
desired_height, desired_width,
output_height, output_width):
image = tf.convert_to_tensor(
np.random.rand(input_height, input_width, 3))
image_shape = tf.shape(image)[0:2]
......@@ -176,9 +156,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(400, 600), (600, 400),
)
def testCenterCropImage(self,
input_height,
input_width):
def test_center_crop_image(self, input_height, input_width):
image = tf.convert_to_tensor(
np.random.rand(input_height, input_width, 3))
cropped_image = preprocess_ops.center_crop_image(image)
......@@ -188,9 +166,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(400, 600), (600, 400),
)
def testCenterCropImageV2(self,
input_height,
input_width):
def test_center_crop_image_v2(self, input_height, input_width):
image_bytes = tf.constant(
_encode_image(
np.uint8(np.random.rand(input_height, input_width, 3) * 255),
......@@ -204,9 +180,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(400, 600), (600, 400),
)
def testRandomCropImage(self,
input_height,
input_width):
def test_random_crop_image(self, input_height, input_width):
image = tf.convert_to_tensor(
np.random.rand(input_height, input_width, 3))
_ = preprocess_ops.random_crop_image(image)
......@@ -214,9 +188,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
(400, 600), (600, 400),
)
def testRandomCropImageV2(self,
input_height,
input_width):
def test_random_crop_image_v2(self, input_height, input_width):
image_bytes = tf.constant(
_encode_image(
np.uint8(np.random.rand(input_height, input_width, 3) * 255),
......@@ -225,6 +197,21 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
_ = preprocess_ops.random_crop_image_v2(
image_bytes, tf.constant([input_height, input_width, 3], tf.int32))
@parameterized.parameters((640, 640, 20), (1280, 1280, 30))
def test_random_crop(self, input_height, input_width, num_boxes):
image = tf.convert_to_tensor(np.random.rand(input_height, input_width, 3))
boxes_height = np.random.randint(0, input_height, size=(num_boxes, 1))
top = np.random.randint(0, high=(input_height - boxes_height))
down = top + boxes_height
boxes_width = np.random.randint(0, input_width, size=(num_boxes, 1))
left = np.random.randint(0, high=(input_width - boxes_width))
right = left + boxes_width
boxes = tf.constant(
np.concatenate([top, left, down, right], axis=-1), tf.float32)
labels = tf.constant(
np.random.randint(low=0, high=num_boxes, size=(num_boxes,)), tf.int64)
_ = preprocess_ops.random_crop(image, boxes, labels)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment