add cleanup transformation for bounding boxes (#6449)

* add cleanup transformation for bounding boxes * change the cleanup transform to an actual compose * address review * address naming * use box_area op * [skip ci] plural * split compose into its parts and expose them * cleanup * more import cleanup * Update torchvision/prototype/transforms/_misc.py Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com> Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com>

add cleanup transformation for bounding boxes (#6449)
* add cleanup transformation for bounding boxes * change the cleanup transform to an actual compose * address review * address naming * use box_area op * [skip ci] plural * split compose into its parts and expose them * cleanup * more import cleanup * Update torchvision/prototype/transforms/_misc.py Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com> Co-authored-by: Vasilis Vryniotis <datumbox@users.noreply.github.com>
c3ec6948 · Philip Meier · GitHub · 5bcf9f68 · c3ec6948 · c3ec6948
Unverified Commit c3ec6948 authored Aug 24, 2022 by Philip Meier Committed by GitHub Aug 24, 2022
3 changed files
--- a/torchvision/prototype/transforms/__init__.py
+++ b/torchvision/prototype/transforms/__init__.py
@@ -35,8 +35,8 @@ from ._geometry import (
    ScaleJitter,
    TenCrop,
 )
-from ._meta import ConvertBoundingBoxFormat, ConvertColorSpace, ConvertImageDtype
-from ._misc import GaussianBlur, Identity, Lambda, LinearTransformation, Normalize, ToDtype
+from ._meta import ClampBoundingBoxes, ConvertBoundingBoxFormat, ConvertColorSpace, ConvertImageDtype
+from ._misc import GaussianBlur, Identity, Lambda, LinearTransformation, Normalize, RemoveSmallBoundingBoxes, ToDtype
 from ._type_conversion import DecodeImage, LabelToOneHot, ToImagePIL, ToImageTensor

 from ._deprecated import Grayscale, RandomGrayscale, ToTensor, ToPILImage, PILToTensor  # usort: skip
--- a/torchvision/prototype/transforms/_meta.py
+++ b/torchvision/prototype/transforms/_meta.py
@@ -66,3 +66,11 @@ class ConvertColorSpace(Transform):
        return F.convert_color_space(
            inpt, color_space=self.color_space, old_color_space=self.old_color_space, copy=self.copy
        )
+
+
+class ClampBoundingBoxes(Transform):
+    _transformed_types = (features.BoundingBox,)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        output = F.clamp_bounding_box(inpt, format=inpt.format, image_size=inpt.image_size)
+        return features.BoundingBox.new_like(inpt, output)
--- a/torchvision/prototype/transforms/_misc.py
+++ b/torchvision/prototype/transforms/_misc.py
@@ -4,8 +4,10 @@ from typing import Any, Callable, Dict, List, Sequence, Type, Union
 import PIL.Image

 import torch
+from torchvision.ops import remove_small_boxes
 from torchvision.prototype import features
 from torchvision.prototype.transforms import functional as F, Transform
+from torchvision.prototype.transforms._utils import query_bounding_box
 from torchvision.transforms.transforms import _setup_size


@@ -135,3 +137,28 @@ class ToDtype(Lambda):

    def extra_repr(self) -> str:
        return ", ".join([f"dtype={self.dtype}", f"types={[type.__name__ for type in self.types]}"])
+
+
+class RemoveSmallBoundingBoxes(Transform):
+    _transformed_types = (features.BoundingBox, features.SegmentationMask, features.Label, features.OneHotLabel)
+
+    def __init__(self, min_size: float = 1.0) -> None:
+        super().__init__()
+        self.min_size = min_size
+
+    def _get_params(self, sample: Any) -> Dict[str, Any]:
+        bounding_box = query_bounding_box(sample)
+
+        # TODO: We can improve performance here by not using the `remove_small_boxes` function. It requires the box to
+        #  be in XYXY format only to calculate the width and height internally. Thus, if the box is in XYWH or CXCYWH
+        #  format,we need to convert first just to afterwards compute the width and height again, although they were
+        #  there in the first place for these formats.
+        bounding_box = F.convert_bounding_box_format(
+            bounding_box, old_format=bounding_box.format, new_format=features.BoundingBoxFormat.XYXY
+        )
+        valid_indices = remove_small_boxes(bounding_box, min_size=self.min_size)
+
+        return dict(valid_indices=valid_indices)
+
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        return inpt.new_like(inpt, inpt[params["valid_indices"]])