Unverified Commit 26099237 authored by vfdev's avatar vfdev Committed by GitHub
Browse files

[proto] Clean-ups from 6512 (#6520)



* clean up bboxes

* Adding padding_mode in pad_bbox

* Fixing tests and renamed _compute_output_size to appropriate names
Co-authored-by: default avatarVasilis Vryniotis <vvryniotis@fb.com>
parent 4cb83c2f
......@@ -4,9 +4,7 @@ from typing import Any, List, Optional, Sequence, Tuple, Union
import torch
from torchvision._utils import StrEnum
from torchvision.transforms import InterpolationMode
from torchvision.transforms.functional import _get_inverse_affine_matrix
from torchvision.transforms.functional_tensor import _compute_output_size
from torchvision.transforms import InterpolationMode # TODO: this needs to be moved out of transforms
from ._feature import _Feature
......@@ -120,20 +118,14 @@ class BoundingBox(_Feature):
fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
padding_mode: str = "constant",
) -> BoundingBox:
if padding_mode not in ["constant"]:
raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")
# This cast does Sequence[int] -> List[int] and is required to make mypy happy
if not isinstance(padding, int):
padding = list(padding)
output = self._F.pad_bounding_box(self, padding, format=self.format)
output = self._F.pad_bounding_box(self, padding, format=self.format, padding_mode=padding_mode)
# Update output image size:
# TODO: remove the import below and make _parse_pad_padding available
from torchvision.transforms.functional_tensor import _parse_pad_padding
left, top, right, bottom = _parse_pad_padding(padding)
left, top, right, bottom = self._F._geometry._parse_pad_padding(padding)
height, width = self.image_size
height += top + bottom
width += left + right
......@@ -155,11 +147,13 @@ class BoundingBox(_Feature):
if expand:
# The way we recompute image_size is not optimal due to redundant computations of
# - rotation matrix (_get_inverse_affine_matrix)
# - points dot matrix (_compute_output_size)
# - points dot matrix (_compute_affine_output_size)
# Alternatively, we could return new image size by self._F.rotate_bounding_box
height, width = image_size
rotation_matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, [0.0, 0.0], 1.0, [0.0, 0.0])
new_width, new_height = _compute_output_size(rotation_matrix, width, height)
rotation_matrix = self._F._geometry._get_inverse_affine_matrix(
[0.0, 0.0], angle, [0.0, 0.0], 1.0, [0.0, 0.0]
)
new_width, new_height = self._F._geometry._FT._compute_affine_output_size(rotation_matrix, width, height)
image_size = (new_height, new_width)
return BoundingBox.new_like(self, output, dtype=output.dtype, image_size=image_size)
......
......@@ -7,7 +7,7 @@ import torch
from torchvision.prototype import features
from torchvision.transforms import functional_pil as _FP, functional_tensor as _FT
from torchvision.transforms.functional import (
_compute_output_size,
_compute_resized_output_size,
_get_inverse_affine_matrix,
_get_perspective_coeffs,
InterpolationMode,
......@@ -104,7 +104,7 @@ def resize_image_tensor(
antialias: bool = False,
) -> torch.Tensor:
num_channels, old_height, old_width = get_dimensions_image_tensor(image)
new_height, new_width = _compute_output_size((old_height, old_width), size=size, max_size=max_size)
new_height, new_width = _compute_resized_output_size((old_height, old_width), size=size, max_size=max_size)
batch_shape = image.shape[:-3]
return _FT.resize(
image.reshape((-1, num_channels, old_height, old_width)),
......@@ -124,7 +124,7 @@ def resize_image_pil(
size = [size, size]
# Explicitly cast size to list otherwise mypy issue: incompatible type "Sequence[int]"; expected "List[int]"
size: List[int] = list(size)
size = _compute_output_size(img.size[::-1], size=size, max_size=max_size)
size = _compute_resized_output_size(img.size[::-1], size=size, max_size=max_size)
return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation])
......@@ -138,7 +138,7 @@ def resize_bounding_box(
bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None
) -> torch.Tensor:
old_height, old_width = image_size
new_height, new_width = _compute_output_size(image_size, size=size, max_size=max_size)
new_height, new_width = _compute_resized_output_size(image_size, size=size, max_size=max_size)
ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device)
return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape)
......@@ -598,8 +598,15 @@ def pad_segmentation_mask(
def pad_bounding_box(
bounding_box: torch.Tensor, padding: Union[int, List[int]], format: features.BoundingBoxFormat
bounding_box: torch.Tensor,
padding: Union[int, List[int]],
format: features.BoundingBoxFormat,
padding_mode: str = "constant",
) -> torch.Tensor:
if padding_mode not in ["constant"]:
# TODO: add support of other padding modes
raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")
left, _, top, _ = _parse_pad_padding(padding)
bounding_box = bounding_box.clone()
......
......@@ -360,7 +360,9 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
return F_t.normalize(tensor, mean=mean, std=std, inplace=inplace)
def _compute_output_size(image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None) -> List[int]:
def _compute_resized_output_size(
image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
) -> List[int]:
if len(size) == 1: # specified size only for the smallest edge
h, w = image_size
short, long = (w, h) if w <= h else (h, w)
......@@ -460,7 +462,7 @@ def resize(
_, image_height, image_width = get_dimensions(img)
if isinstance(size, int):
size = [size]
output_size = _compute_output_size((image_height, image_width), size, max_size)
output_size = _compute_resized_output_size((image_height, image_width), size, max_size)
if (image_height, image_width) == output_size:
return img
......
......@@ -610,7 +610,7 @@ def affine(
return _apply_grid_transform(img, grid, interpolation, fill=fill)
def _compute_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
def _compute_affine_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
# Inspired of PIL implementation:
# https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
......@@ -652,7 +652,7 @@ def rotate(
) -> Tensor:
_assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
w, h = img.shape[-1], img.shape[-2]
ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h)
ow, oh = _compute_affine_output_size(matrix, w, h) if expand else (w, h)
dtype = img.dtype if torch.is_floating_point(img) else torch.float32
theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3)
# grid will be generated on the same device as theta and img
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment