Unverified Commit c3ec6948 authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

add cleanup transformation for bounding boxes (#6449)



* add cleanup transformation for bounding boxes

* change the cleanup transform to an actual compose

* address review

* address naming

* use box_area op

* [skip ci] plural

* split compose into its parts and expose them

* cleanup

* more import cleanup

* Update torchvision/prototype/transforms/_misc.py
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
parent 5bcf9f68
...@@ -35,8 +35,8 @@ from ._geometry import ( ...@@ -35,8 +35,8 @@ from ._geometry import (
ScaleJitter, ScaleJitter,
TenCrop, TenCrop,
) )
from ._meta import ConvertBoundingBoxFormat, ConvertColorSpace, ConvertImageDtype from ._meta import ClampBoundingBoxes, ConvertBoundingBoxFormat, ConvertColorSpace, ConvertImageDtype
from ._misc import GaussianBlur, Identity, Lambda, LinearTransformation, Normalize, ToDtype from ._misc import GaussianBlur, Identity, Lambda, LinearTransformation, Normalize, RemoveSmallBoundingBoxes, ToDtype
from ._type_conversion import DecodeImage, LabelToOneHot, ToImagePIL, ToImageTensor from ._type_conversion import DecodeImage, LabelToOneHot, ToImagePIL, ToImageTensor
from ._deprecated import Grayscale, RandomGrayscale, ToTensor, ToPILImage, PILToTensor # usort: skip from ._deprecated import Grayscale, RandomGrayscale, ToTensor, ToPILImage, PILToTensor # usort: skip
...@@ -66,3 +66,11 @@ class ConvertColorSpace(Transform): ...@@ -66,3 +66,11 @@ class ConvertColorSpace(Transform):
return F.convert_color_space( return F.convert_color_space(
inpt, color_space=self.color_space, old_color_space=self.old_color_space, copy=self.copy inpt, color_space=self.color_space, old_color_space=self.old_color_space, copy=self.copy
) )
class ClampBoundingBoxes(Transform):
_transformed_types = (features.BoundingBox,)
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
output = F.clamp_bounding_box(inpt, format=inpt.format, image_size=inpt.image_size)
return features.BoundingBox.new_like(inpt, output)
...@@ -4,8 +4,10 @@ from typing import Any, Callable, Dict, List, Sequence, Type, Union ...@@ -4,8 +4,10 @@ from typing import Any, Callable, Dict, List, Sequence, Type, Union
import PIL.Image import PIL.Image
import torch import torch
from torchvision.ops import remove_small_boxes
from torchvision.prototype import features from torchvision.prototype import features
from torchvision.prototype.transforms import functional as F, Transform from torchvision.prototype.transforms import functional as F, Transform
from torchvision.prototype.transforms._utils import query_bounding_box
from torchvision.transforms.transforms import _setup_size from torchvision.transforms.transforms import _setup_size
...@@ -135,3 +137,28 @@ class ToDtype(Lambda): ...@@ -135,3 +137,28 @@ class ToDtype(Lambda):
def extra_repr(self) -> str: def extra_repr(self) -> str:
return ", ".join([f"dtype={self.dtype}", f"types={[type.__name__ for type in self.types]}"]) return ", ".join([f"dtype={self.dtype}", f"types={[type.__name__ for type in self.types]}"])
class RemoveSmallBoundingBoxes(Transform):
_transformed_types = (features.BoundingBox, features.SegmentationMask, features.Label, features.OneHotLabel)
def __init__(self, min_size: float = 1.0) -> None:
super().__init__()
self.min_size = min_size
def _get_params(self, sample: Any) -> Dict[str, Any]:
bounding_box = query_bounding_box(sample)
# TODO: We can improve performance here by not using the `remove_small_boxes` function. It requires the box to
# be in XYXY format only to calculate the width and height internally. Thus, if the box is in XYWH or CXCYWH
# format,we need to convert first just to afterwards compute the width and height again, although they were
# there in the first place for these formats.
bounding_box = F.convert_bounding_box_format(
bounding_box, old_format=bounding_box.format, new_format=features.BoundingBoxFormat.XYXY
)
valid_indices = remove_small_boxes(bounding_box, min_size=self.min_size)
return dict(valid_indices=valid_indices)
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
return inpt.new_like(inpt, inpt[params["valid_indices"]])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment