Add random_crop with boxes and labels and tests

PiperOrigin-RevId: 393039446

Add random_crop with boxes and labels and tests
PiperOrigin-RevId: 393039446
a1facb46 · A. Unique TensorFlower · eceece58 · a1facb46 · a1facb46
Commit a1facb46 authored Aug 25, 2021 by A. Unique TensorFlower
Showing with 213 additions and 46 deletions

official/vision/beta/ops/preprocess_ops.py official/vision/beta/ops/preprocess_ops.py +180 -0

official/vision/beta/ops/preprocess_ops_test.py official/vision/beta/ops/preprocess_ops_test.py +33 -46

No files found.
--- a/official/vision/beta/ops/preprocess_ops.py
+++ b/official/vision/beta/ops/preprocess_ops.py
@@ -555,3 +555,183 @@ def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=1):
          lambda: masks)

    return image, normalized_boxes, masks
+
+
+def random_crop_image_with_boxes_and_labels(img, boxes, labels, min_scale,
+                                            aspect_ratio_range,
+                                            min_overlap_params, max_retry):
+  """Crops a random slice from the input image.
+
+  The function will correspondingly recompute the bounding boxes and filter out
+  outside boxes and their labels.
+
+  References:
+  [1] End-to-End Object Detection with Transformers
+  https://arxiv.org/abs/2005.12872
+
+  The preprocessing steps:
+  1. Sample a minimum IoU overlap.
+  2. For each trial, sample the new image width, height, and top-left corner.
+  3. Compute the IoUs of bounding boxes with the cropped image and retry if
+    the maximum IoU is below the sampled threshold.
+  4. Find boxes whose centers are in the cropped image.
+  5. Compute new bounding boxes in the cropped region and only select those
+    boxes' labels.
+
+  Args:
+    img: a 'Tensor' of shape [height, width, 3] representing the input image.
+    boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
+      boxes with (ymin, xmin, ymax, xmax).
+    labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
+    min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
+      scale variable.
+    aspect_ratio_range: a list of two 'float' that specifies the lower and upper
+      bound of the random aspect ratio.
+    min_overlap_params: a list of four 'float' representing the min value, max
+      value, step size, and offset for the minimum overlap sample.
+    max_retry: an 'int' representing the number of trials for cropping. If it is
+      exhausted, no cropping will be performed.
+
+  Returns:
+    img: a Tensor representing the random cropped image. Can be the
+      original image if max_retry is exhausted.
+    boxes: a Tensor representing the bounding boxes in the cropped image.
+    labels: a Tensor representing the new bounding boxes' labels.
+  """
+
+  shape = tf.shape(img)
+  original_h = shape[0]
+  original_w = shape[1]
+
+  minval, maxval, step, offset = min_overlap_params
+
+  min_overlap = tf.math.floordiv(
+      tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset
+
+  min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1)
+
+  if min_overlap > 1.0:
+    return img, boxes, labels
+
+  aspect_ratio_low = aspect_ratio_range[0]
+  aspect_ratio_high = aspect_ratio_range[1]
+
+  for _ in tf.range(max_retry):
+    scale_h = tf.random.uniform([], min_scale, 1.0)
+    scale_w = tf.random.uniform([], min_scale, 1.0)
+    new_h = tf.cast(
+        scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32)
+    new_w = tf.cast(
+        scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32)
+
+    # Aspect ratio has to be in the prespecified range
+    aspect_ratio = new_h / new_w
+    if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high:
+      continue
+
+    left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32)
+    right = left + new_w
+    top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32)
+    bottom = top + new_h
+
+    normalized_left = tf.cast(
+        left, dtype=tf.float32) / tf.cast(
+            original_w, dtype=tf.float32)
+    normalized_right = tf.cast(
+        right, dtype=tf.float32) / tf.cast(
+            original_w, dtype=tf.float32)
+    normalized_top = tf.cast(
+        top, dtype=tf.float32) / tf.cast(
+            original_h, dtype=tf.float32)
+    normalized_bottom = tf.cast(
+        bottom, dtype=tf.float32) / tf.cast(
+            original_h, dtype=tf.float32)
+
+    cropped_box = tf.expand_dims(
+        tf.stack([
+            normalized_top,
+            normalized_left,
+            normalized_bottom,
+            normalized_right,
+        ]),
+        axis=0)
+    iou = box_ops.bbox_overlap(
+        tf.expand_dims(cropped_box, axis=0),
+        tf.expand_dims(boxes, axis=0))  # (1, 1, n_ground_truth)
+    iou = tf.squeeze(iou, axis=[0, 1])
+
+    # If not a single bounding box has a Jaccard overlap of greater than
+    # the minimum, try again
+    if tf.reduce_max(iou) < min_overlap:
+      continue
+
+    centroids = box_ops.yxyx_to_cycxhw(boxes)
+    mask = tf.math.logical_and(
+        tf.math.logical_and(centroids[:, 0] > normalized_top,
+                            centroids[:, 0] < normalized_bottom),
+        tf.math.logical_and(centroids[:, 1] > normalized_left,
+                            centroids[:, 1] < normalized_right))
+    # If not a single bounding box has its center in the crop, try again.
+    if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0:
+      indices = tf.squeeze(tf.where(mask), axis=1)
+
+      filtered_boxes = tf.gather(boxes, indices)
+
+      boxes = tf.clip_by_value(
+          (filtered_boxes[..., :] * tf.cast(
+              tf.stack([original_h, original_w, original_h, original_w]),
+              dtype=tf.float32) -
+           tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) /
+          tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32),
+          0.0, 1.0)
+
+      img = tf.image.crop_to_bounding_box(img, top, left, bottom - top,
+                                          right - left)
+
+      labels = tf.gather(labels, indices)
+      break
+
+  return img, boxes, labels
+
+
+def random_crop(image,
+                boxes,
+                labels,
+                min_scale=0.3,
+                aspect_ratio_range=(0.5, 2.0),
+                min_overlap_params=(0.0, 1.4, 0.2, 0.1),
+                max_retry=50,
+                seed=None):
+  """Randomly crop the image and boxes, filtering labels.
+
+  Args:
+    image: a 'Tensor' of shape [height, width, 3] representing the input image.
+    boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
+      boxes with (ymin, xmin, ymax, xmax).
+    labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
+    min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
+      scale variable.
+    aspect_ratio_range: a list of two 'float' that specifies the lower and upper
+      bound of the random aspect ratio.
+    min_overlap_params: a list of four 'float' representing the min value, max
+      value, step size, and offset for the minimum overlap sample.
+    max_retry: an 'int' representing the number of trials for cropping. If it is
+      exhausted, no cropping will be performed.
+    seed: the random number seed of int, but could be None.
+
+  Returns:
+    image: a Tensor representing the random cropped image. Can be the
+      original image if max_retry is exhausted.
+    boxes: a Tensor representing the bounding boxes in the cropped image.
+    labels: a Tensor representing the new bounding boxes' labels.
+  """
+  with tf.name_scope('random_crop'):
+    do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5)
+    if do_crop:
+      return random_crop_image_with_boxes_and_labels(image, boxes, labels,
+                                                     min_scale,
+                                                     aspect_ratio_range,
+                                                     min_overlap_params,
+                                                     max_retry)
+    else:
+      return image, boxes, labels
--- a/official/vision/beta/ops/preprocess_ops_test.py
+++ b/official/vision/beta/ops/preprocess_ops_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-
 """Tests for preprocess_ops.py."""

 import io
@@ -42,7 +41,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
      ([12, 2], 10),
      ([13, 2, 3], 10),
  )
-  def testPadToFixedSize(self, input_shape, output_size):
+  def test_pad_to_fixed_size(self, input_shape, output_size):
    # Copies input shape to padding shape.
    clip_shape = input_shape[:]
    clip_shape[0] = min(output_size, clip_shape[0])
@@ -63,16 +62,11 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
      (100, 256, 128, 256, 32, 1.0, 1.0, 128, 256),
      (200, 512, 200, 128, 32, 0.25, 0.25, 224, 128),
  )
-  def testResizeAndCropImageRectangluarCase(self,
-                                            input_height,
-                                            input_width,
-                                            desired_height,
-                                            desired_width,
-                                            stride,
-                                            scale_y,
-                                            scale_x,
-                                            output_height,
-                                            output_width):
+  def test_resize_and_crop_image_rectangluar_case(self, input_height,
+                                                  input_width, desired_height,
+                                                  desired_width, stride,
+                                                  scale_y, scale_x,
+                                                  output_height, output_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))

@@ -98,16 +92,10 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
      (100, 200, 220, 220, 32, 1.1, 1.1, 224, 224),
      (512, 512, 1024, 1024, 32, 2.0, 2.0, 1024, 1024),
  )
-  def testResizeAndCropImageSquareCase(self,
-                                       input_height,
-                                       input_width,
-                                       desired_height,
-                                       desired_width,
-                                       stride,
-                                       scale_y,
-                                       scale_x,
-                                       output_height,
-                                       output_width):
+  def test_resize_and_crop_image_square_case(self, input_height, input_width,
+                                             desired_height, desired_width,
+                                             stride, scale_y, scale_x,
+                                             output_height, output_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))

@@ -135,18 +123,10 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
      (100, 200, 80, 100, 32, 0.5, 0.5, 50, 100, 96, 128),
      (200, 100, 80, 100, 32, 0.5, 0.5, 100, 50, 128, 96),
  )
-  def testResizeAndCropImageV2(self,
-                               input_height,
-                               input_width,
-                               short_side,
-                               long_side,
-                               stride,
-                               scale_y,
-                               scale_x,
-                               desired_height,
-                               desired_width,
-                               output_height,
-                               output_width):
+  def test_resize_and_crop_image_v2(self, input_height, input_width, short_side,
+                                    long_side, stride, scale_y, scale_x,
+                                    desired_height, desired_width,
+                                    output_height, output_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))
    image_shape = tf.shape(image)[0:2]
@@ -176,9 +156,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
  @parameterized.parameters(
      (400, 600), (600, 400),
  )
-  def testCenterCropImage(self,
-                          input_height,
-                          input_width):
+  def test_center_crop_image(self, input_height, input_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))
    cropped_image = preprocess_ops.center_crop_image(image)
@@ -188,9 +166,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
  @parameterized.parameters(
      (400, 600), (600, 400),
  )
-  def testCenterCropImageV2(self,
-                            input_height,
-                            input_width):
+  def test_center_crop_image_v2(self, input_height, input_width):
    image_bytes = tf.constant(
        _encode_image(
            np.uint8(np.random.rand(input_height, input_width, 3) * 255),
@@ -204,9 +180,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
  @parameterized.parameters(
      (400, 600), (600, 400),
  )
-  def testRandomCropImage(self,
-                          input_height,
-                          input_width):
+  def test_random_crop_image(self, input_height, input_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))
    _ = preprocess_ops.random_crop_image(image)
@@ -214,9 +188,7 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
  @parameterized.parameters(
      (400, 600), (600, 400),
  )
-  def testRandomCropImageV2(self,
-                            input_height,
-                            input_width):
+  def test_random_crop_image_v2(self, input_height, input_width):
    image_bytes = tf.constant(
        _encode_image(
            np.uint8(np.random.rand(input_height, input_width, 3) * 255),
@@ -225,6 +197,21 @@ class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
    _ = preprocess_ops.random_crop_image_v2(
        image_bytes, tf.constant([input_height, input_width, 3], tf.int32))

+  @parameterized.parameters((640, 640, 20), (1280, 1280, 30))
+  def test_random_crop(self, input_height, input_width, num_boxes):
+    image = tf.convert_to_tensor(np.random.rand(input_height, input_width, 3))
+    boxes_height = np.random.randint(0, input_height, size=(num_boxes, 1))
+    top = np.random.randint(0, high=(input_height - boxes_height))
+    down = top + boxes_height
+    boxes_width = np.random.randint(0, input_width, size=(num_boxes, 1))
+    left = np.random.randint(0, high=(input_width - boxes_width))
+    right = left + boxes_width
+    boxes = tf.constant(
+        np.concatenate([top, left, down, right], axis=-1), tf.float32)
+    labels = tf.constant(
+        np.random.randint(low=0, high=num_boxes, size=(num_boxes,)), tf.int64)
+    _ = preprocess_ops.random_crop(image, boxes, labels)
+

 if __name__ == '__main__':
  tf.test.main()