[proto] Clean-ups from 6512 (#6520)

* clean up bboxes * Adding padding_mode in pad_bbox * Fixing tests and renamed _compute_output_size to appropriate names Co-authored-by: Vasilis Vryniotis <vvryniotis@fb.com>

[proto] Clean-ups from 6512 (#6520)
* clean up bboxes * Adding padding_mode in pad_bbox * Fixing tests and renamed _compute_output_size to appropriate names Co-authored-by: Vasilis Vryniotis <vvryniotis@fb.com>
26099237 · vfdev · GitHub · 4cb83c2f · 26099237 · 26099237
Unverified Commit 26099237 authored Aug 31, 2022 by vfdev Committed by GitHub Aug 31, 2022
4 changed files
--- a/torchvision/prototype/features/_bounding_box.py
+++ b/torchvision/prototype/features/_bounding_box.py
@@ -4,9 +4,7 @@ from typing import Any, List, Optional, Sequence, Tuple, Union

 import torch
 from torchvision._utils import StrEnum
-from torchvision.transforms import InterpolationMode
-from torchvision.transforms.functional import _get_inverse_affine_matrix
-from torchvision.transforms.functional_tensor import _compute_output_size
+from torchvision.transforms import InterpolationMode  # TODO: this needs to be moved out of transforms

 from ._feature import _Feature

@@ -120,20 +118,14 @@ class BoundingBox(_Feature):
        fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
        padding_mode: str = "constant",
    ) -> BoundingBox:
-        if padding_mode not in ["constant"]:
-            raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")
-
        # This cast does Sequence[int] -> List[int] and is required to make mypy happy
        if not isinstance(padding, int):
            padding = list(padding)

-        output = self._F.pad_bounding_box(self, padding, format=self.format)
+        output = self._F.pad_bounding_box(self, padding, format=self.format, padding_mode=padding_mode)

        # Update output image size:
-        # TODO: remove the import below and make _parse_pad_padding available
-        from torchvision.transforms.functional_tensor import _parse_pad_padding
-
-        left, top, right, bottom = _parse_pad_padding(padding)
+        left, top, right, bottom = self._F._geometry._parse_pad_padding(padding)
        height, width = self.image_size
        height += top + bottom
        width += left + right
@@ -155,11 +147,13 @@ class BoundingBox(_Feature):
        if expand:
            # The way we recompute image_size is not optimal due to redundant computations of
            # - rotation matrix (_get_inverse_affine_matrix)
-            # - points dot matrix (_compute_output_size)
+            # - points dot matrix (_compute_affine_output_size)
            # Alternatively, we could return new image size by self._F.rotate_bounding_box
            height, width = image_size
-            rotation_matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, [0.0, 0.0], 1.0, [0.0, 0.0])
-            new_width, new_height = _compute_output_size(rotation_matrix, width, height)
+            rotation_matrix = self._F._geometry._get_inverse_affine_matrix(
+                [0.0, 0.0], angle, [0.0, 0.0], 1.0, [0.0, 0.0]
+            )
+            new_width, new_height = self._F._geometry._FT._compute_affine_output_size(rotation_matrix, width, height)
            image_size = (new_height, new_width)

        return BoundingBox.new_like(self, output, dtype=output.dtype, image_size=image_size)

--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -7,7 +7,7 @@ import torch
 from torchvision.prototype import features
 from torchvision.transforms import functional_pil as _FP, functional_tensor as _FT
 from torchvision.transforms.functional import (
-    _compute_output_size,
+    _compute_resized_output_size,
    _get_inverse_affine_matrix,
    _get_perspective_coeffs,
    InterpolationMode,
@@ -104,7 +104,7 @@ def resize_image_tensor(
    antialias: bool = False,
 ) -> torch.Tensor:
    num_channels, old_height, old_width = get_dimensions_image_tensor(image)
-    new_height, new_width = _compute_output_size((old_height, old_width), size=size, max_size=max_size)
+    new_height, new_width = _compute_resized_output_size((old_height, old_width), size=size, max_size=max_size)
    batch_shape = image.shape[:-3]
    return _FT.resize(
        image.reshape((-1, num_channels, old_height, old_width)),
@@ -124,7 +124,7 @@ def resize_image_pil(
        size = [size, size]
    # Explicitly cast size to list otherwise mypy issue: incompatible type "Sequence[int]"; expected "List[int]"
    size: List[int] = list(size)
-    size = _compute_output_size(img.size[::-1], size=size, max_size=max_size)
+    size = _compute_resized_output_size(img.size[::-1], size=size, max_size=max_size)
    return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation])


@@ -138,7 +138,7 @@ def resize_bounding_box(
    bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None
 ) -> torch.Tensor:
    old_height, old_width = image_size
-    new_height, new_width = _compute_output_size(image_size, size=size, max_size=max_size)
+    new_height, new_width = _compute_resized_output_size(image_size, size=size, max_size=max_size)
    ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device)
    return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape)

@@ -598,8 +598,15 @@ def pad_segmentation_mask(


 def pad_bounding_box(
-    bounding_box: torch.Tensor, padding: Union[int, List[int]], format: features.BoundingBoxFormat
+    bounding_box: torch.Tensor,
+    padding: Union[int, List[int]],
+    format: features.BoundingBoxFormat,
+    padding_mode: str = "constant",
 ) -> torch.Tensor:
+    if padding_mode not in ["constant"]:
+        # TODO: add support of other padding modes
+        raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")
+
    left, _, top, _ = _parse_pad_padding(padding)

    bounding_box = bounding_box.clone()

--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -360,7 +360,9 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
    return F_t.normalize(tensor, mean=mean, std=std, inplace=inplace)


-def _compute_output_size(image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None) -> List[int]:
+def _compute_resized_output_size(
+    image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
+) -> List[int]:
    if len(size) == 1:  # specified size only for the smallest edge
        h, w = image_size
        short, long = (w, h) if w <= h else (h, w)
@@ -460,7 +462,7 @@ def resize(
    _, image_height, image_width = get_dimensions(img)
    if isinstance(size, int):
        size = [size]
-    output_size = _compute_output_size((image_height, image_width), size, max_size)
+    output_size = _compute_resized_output_size((image_height, image_width), size, max_size)

    if (image_height, image_width) == output_size:
        return img

--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -610,7 +610,7 @@ def affine(
    return _apply_grid_transform(img, grid, interpolation, fill=fill)


-def _compute_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+def _compute_affine_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:

    # Inspired of PIL implementation:
    # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
@@ -652,7 +652,7 @@ def rotate(
 ) -> Tensor:
    _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
    w, h = img.shape[-1], img.shape[-2]
-    ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h)
+    ow, oh = _compute_affine_output_size(matrix, w, h) if expand else (w, h)
    dtype = img.dtype if torch.is_floating_point(img) else torch.float32
    theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3)
    # grid will be generated on the same device as theta and img