Unverified Commit b8af91ab authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

[prototype] Fix BC-breakages on input params of `F` (#6636)



* Fix `size` in resize.

* Update torchvision/prototype/features/_bounding_box.py
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>

* Address some of the comments.

* Fix `output_size` in center_crop.

* Fix `CenterCrop` transform

* Fix `size` in five_crop.

* Fix `size` in ten_crop.

* Fix `kernel_size` and `sigma` in gaussian_blur.

* Fix `angle` and `shear` in affine.

* Fixing JIT-scriptability issues.

* Update TODOs.

* Restore fake types for `Union[int, List[int]]` and `Union[int, float, List[float]]`

* Fixing tests

* Fix linter

* revert unnecessary JIT mitigations.

* Cherrypick Philip's 6dfc9657ce89fe9e018a11ee25a8e26c7d3d43c6

* Linter fix

* Adding center float casting
Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
parent c2ca691d
...@@ -799,7 +799,9 @@ class TestGaussianBlur: ...@@ -799,7 +799,9 @@ class TestGaussianBlur:
with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"):
transforms.GaussianBlur(4) transforms.GaussianBlur(4)
with pytest.raises(TypeError, match="sigma should be a single float or a list/tuple with length 2"): with pytest.raises(
TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats."
):
transforms.GaussianBlur(3, sigma=[1, 2, 3]) transforms.GaussianBlur(3, sigma=[1, 2, 3])
with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"): with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"):
...@@ -833,7 +835,7 @@ class TestGaussianBlur: ...@@ -833,7 +835,7 @@ class TestGaussianBlur:
if isinstance(sigma, (tuple, list)): if isinstance(sigma, (tuple, list)):
assert transform.sigma == sigma assert transform.sigma == sigma
else: else:
assert transform.sigma == (sigma, sigma) assert transform.sigma == [sigma, sigma]
fn = mocker.patch("torchvision.prototype.transforms.functional.gaussian_blur") fn = mocker.patch("torchvision.prototype.transforms.functional.gaussian_blur")
inpt = mocker.MagicMock(spec=features.Image) inpt = mocker.MagicMock(spec=features.Image)
......
...@@ -84,6 +84,8 @@ class BoundingBox(_Feature): ...@@ -84,6 +84,8 @@ class BoundingBox(_Feature):
antialias: bool = False, antialias: bool = False,
) -> BoundingBox: ) -> BoundingBox:
output = self._F.resize_bounding_box(self, size, image_size=self.image_size, max_size=max_size) output = self._F.resize_bounding_box(self, size, image_size=self.image_size, max_size=max_size)
if isinstance(size, int):
size = [size]
image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1]) image_size = (size[0], size[0]) if len(size) == 1 else (size[0], size[1])
return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype) return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype)
...@@ -95,6 +97,8 @@ class BoundingBox(_Feature): ...@@ -95,6 +97,8 @@ class BoundingBox(_Feature):
output = self._F.center_crop_bounding_box( output = self._F.center_crop_bounding_box(
self, format=self.format, output_size=output_size, image_size=self.image_size self, format=self.format, output_size=output_size, image_size=self.image_size
) )
if isinstance(output_size, int):
output_size = [output_size]
image_size = (output_size[0], output_size[0]) if len(output_size) == 1 else (output_size[0], output_size[1]) image_size = (output_size[0], output_size[0]) if len(output_size) == 1 else (output_size[0], output_size[1])
return BoundingBox.new_like(self, output, image_size=image_size) return BoundingBox.new_like(self, output, image_size=image_size)
...@@ -160,7 +164,7 @@ class BoundingBox(_Feature): ...@@ -160,7 +164,7 @@ class BoundingBox(_Feature):
def affine( def affine(
self, self,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
......
...@@ -169,7 +169,7 @@ class _Feature(torch.Tensor): ...@@ -169,7 +169,7 @@ class _Feature(torch.Tensor):
def affine( def affine(
self, self,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
......
...@@ -198,7 +198,7 @@ class Image(_Feature): ...@@ -198,7 +198,7 @@ class Image(_Feature):
def affine( def affine(
self, self,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
......
...@@ -70,7 +70,7 @@ class Mask(_Feature): ...@@ -70,7 +70,7 @@ class Mask(_Feature):
def affine( def affine(
self, self,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
......
...@@ -19,6 +19,7 @@ from ._utils import ( ...@@ -19,6 +19,7 @@ from ._utils import (
_check_sequence_input, _check_sequence_input,
_setup_angle, _setup_angle,
_setup_fill_arg, _setup_fill_arg,
_setup_float_or_seq,
_setup_size, _setup_size,
has_all, has_all,
has_any, has_any,
...@@ -67,9 +68,9 @@ class Resize(Transform): ...@@ -67,9 +68,9 @@ class Resize(Transform):
class CenterCrop(Transform): class CenterCrop(Transform):
def __init__(self, size: List[int]): def __init__(self, size: Union[int, Sequence[int]]):
super().__init__() super().__init__()
self.size = size self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
return F.center_crop(inpt, output_size=self.size) return F.center_crop(inpt, output_size=self.size)
...@@ -320,7 +321,7 @@ class RandomAffine(Transform): ...@@ -320,7 +321,7 @@ class RandomAffine(Transform):
degrees: Union[numbers.Number, Sequence], degrees: Union[numbers.Number, Sequence],
translate: Optional[Sequence[float]] = None, translate: Optional[Sequence[float]] = None,
scale: Optional[Sequence[float]] = None, scale: Optional[Sequence[float]] = None,
shear: Optional[Union[float, Sequence[float]]] = None, shear: Optional[Union[int, float, Sequence[float]]] = None,
interpolation: InterpolationMode = InterpolationMode.NEAREST, interpolation: InterpolationMode = InterpolationMode.NEAREST,
fill: Union[features.FillType, Dict[Type, features.FillType]] = 0, fill: Union[features.FillType, Dict[Type, features.FillType]] = 0,
center: Optional[List[float]] = None, center: Optional[List[float]] = None,
...@@ -545,23 +546,6 @@ class RandomPerspective(_RandomApplyTransform): ...@@ -545,23 +546,6 @@ class RandomPerspective(_RandomApplyTransform):
) )
def _setup_float_or_seq(arg: Union[float, Sequence[float]], name: str, req_size: int = 2) -> Sequence[float]:
if not isinstance(arg, (float, Sequence)):
raise TypeError(f"{name} should be float or a sequence of floats. Got {type(arg)}")
if isinstance(arg, Sequence) and len(arg) != req_size:
raise ValueError(f"If {name} is a sequence its length should be one of {req_size}. Got {len(arg)}")
if isinstance(arg, Sequence):
for element in arg:
if not isinstance(element, float):
raise ValueError(f"{name} should be a sequence of floats. Got {type(element)}")
if isinstance(arg, float):
arg = [float(arg), float(arg)]
if isinstance(arg, (list, tuple)) and len(arg) == 1:
arg = [arg[0], arg[0]]
return arg
class ElasticTransform(Transform): class ElasticTransform(Transform):
def __init__( def __init__(
self, self,
......
...@@ -8,7 +8,7 @@ from torchvision.ops import remove_small_boxes ...@@ -8,7 +8,7 @@ from torchvision.ops import remove_small_boxes
from torchvision.prototype import features from torchvision.prototype import features
from torchvision.prototype.transforms import functional as F, Transform from torchvision.prototype.transforms import functional as F, Transform
from ._utils import _setup_size, has_any, query_bounding_box from ._utils import _setup_float_or_seq, _setup_size, has_any, query_bounding_box
class Identity(Transform): class Identity(Transform):
...@@ -112,7 +112,7 @@ class Normalize(Transform): ...@@ -112,7 +112,7 @@ class Normalize(Transform):
class GaussianBlur(Transform): class GaussianBlur(Transform):
def __init__( def __init__(
self, kernel_size: Union[int, Sequence[int]], sigma: Union[float, Sequence[float]] = (0.1, 2.0) self, kernel_size: Union[int, Sequence[int]], sigma: Union[int, float, Sequence[float]] = (0.1, 2.0)
) -> None: ) -> None:
super().__init__() super().__init__()
self.kernel_size = _setup_size(kernel_size, "Kernel size should be a tuple/list of two integers") self.kernel_size = _setup_size(kernel_size, "Kernel size should be a tuple/list of two integers")
...@@ -120,17 +120,17 @@ class GaussianBlur(Transform): ...@@ -120,17 +120,17 @@ class GaussianBlur(Transform):
if ks <= 0 or ks % 2 == 0: if ks <= 0 or ks % 2 == 0:
raise ValueError("Kernel size value should be an odd and positive number.") raise ValueError("Kernel size value should be an odd and positive number.")
if isinstance(sigma, float): if isinstance(sigma, (int, float)):
if sigma <= 0: if sigma <= 0:
raise ValueError("If sigma is a single number, it must be positive.") raise ValueError("If sigma is a single number, it must be positive.")
sigma = (sigma, sigma) sigma = float(sigma)
elif isinstance(sigma, Sequence) and len(sigma) == 2: elif isinstance(sigma, Sequence) and len(sigma) == 2:
if not 0.0 < sigma[0] <= sigma[1]: if not 0.0 < sigma[0] <= sigma[1]:
raise ValueError("sigma values should be positive and of the form (min, max).") raise ValueError("sigma values should be positive and of the form (min, max).")
else: else:
raise TypeError("sigma should be a single float or a list/tuple with length 2 floats.") raise TypeError("sigma should be a single int or float or a list/tuple with length 2 floats.")
self.sigma = sigma self.sigma = _setup_float_or_seq(sigma, "sigma", 2)
def _get_params(self, sample: Any) -> Dict[str, Any]: def _get_params(self, sample: Any) -> Dict[str, Any]:
sigma = torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item() sigma = torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item()
......
...@@ -16,6 +16,23 @@ from torchvision.transforms.transforms import _check_sequence_input, _setup_angl ...@@ -16,6 +16,23 @@ from torchvision.transforms.transforms import _check_sequence_input, _setup_angl
from typing_extensions import Literal from typing_extensions import Literal
def _setup_float_or_seq(arg: Union[float, Sequence[float]], name: str, req_size: int = 2) -> Sequence[float]:
if not isinstance(arg, (float, Sequence)):
raise TypeError(f"{name} should be float or a sequence of floats. Got {type(arg)}")
if isinstance(arg, Sequence) and len(arg) != req_size:
raise ValueError(f"If {name} is a sequence its length should be one of {req_size}. Got {len(arg)}")
if isinstance(arg, Sequence):
for element in arg:
if not isinstance(element, float):
raise ValueError(f"{name} should be a sequence of floats. Got {type(element)}")
if isinstance(arg, float):
arg = [float(arg), float(arg)]
if isinstance(arg, (list, tuple)) and len(arg) == 1:
arg = [arg[0], arg[0]]
return arg
def _check_fill_arg(fill: Union[FillType, Dict[Type, FillType]]) -> None: def _check_fill_arg(fill: Union[FillType, Dict[Type, FillType]]) -> None:
if isinstance(fill, dict): if isinstance(fill, dict):
for key, value in fill.items(): for key, value in fill.items():
......
...@@ -97,6 +97,8 @@ def resize_image_tensor( ...@@ -97,6 +97,8 @@ def resize_image_tensor(
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: bool = False, antialias: bool = False,
) -> torch.Tensor: ) -> torch.Tensor:
if isinstance(size, int):
size = [size]
num_channels, old_height, old_width = get_dimensions_image_tensor(image) num_channels, old_height, old_width = get_dimensions_image_tensor(image)
new_height, new_width = _compute_resized_output_size((old_height, old_width), size=size, max_size=max_size) new_height, new_width = _compute_resized_output_size((old_height, old_width), size=size, max_size=max_size)
extra_dims = image.shape[:-3] extra_dims = image.shape[:-3]
...@@ -145,6 +147,8 @@ def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = N ...@@ -145,6 +147,8 @@ def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = N
def resize_bounding_box( def resize_bounding_box(
bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None
) -> torch.Tensor: ) -> torch.Tensor:
if isinstance(size, int):
size = [size]
old_height, old_width = image_size old_height, old_width = image_size
new_height, new_width = _compute_resized_output_size(image_size, size=size, max_size=max_size) new_height, new_width = _compute_resized_output_size(image_size, size=size, max_size=max_size)
ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device) ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device)
...@@ -171,7 +175,7 @@ def resize( ...@@ -171,7 +175,7 @@ def resize(
def _affine_parse_args( def _affine_parse_args(
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -214,15 +218,18 @@ def _affine_parse_args( ...@@ -214,15 +218,18 @@ def _affine_parse_args(
if len(shear) != 2: if len(shear) != 2:
raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") raise ValueError(f"Shear should be a sequence containing two values. Got {shear}")
if center is not None and not isinstance(center, (list, tuple)): if center is not None:
raise TypeError("Argument center should be a sequence") if not isinstance(center, (list, tuple)):
raise TypeError("Argument center should be a sequence")
else:
center = [float(c) for c in center]
return angle, translate, shear, center return angle, translate, shear, center
def affine_image_tensor( def affine_image_tensor(
img: torch.Tensor, img: torch.Tensor,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -254,7 +261,7 @@ def affine_image_tensor( ...@@ -254,7 +261,7 @@ def affine_image_tensor(
@torch.jit.unused @torch.jit.unused
def affine_image_pil( def affine_image_pil(
img: PIL.Image.Image, img: PIL.Image.Image,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -278,34 +285,26 @@ def affine_image_pil( ...@@ -278,34 +285,26 @@ def affine_image_pil(
def _affine_bounding_box_xyxy( def _affine_bounding_box_xyxy(
bounding_box: torch.Tensor, bounding_box: torch.Tensor,
image_size: Tuple[int, int], image_size: Tuple[int, int],
angle: float, angle: Union[int, float],
translate: Optional[List[float]] = None, translate: List[float],
scale: Optional[float] = None, scale: float,
shear: Optional[List[float]] = None, shear: List[float],
center: Optional[List[float]] = None, center: Optional[List[float]] = None,
expand: bool = False, expand: bool = False,
) -> torch.Tensor: ) -> torch.Tensor:
dtype = bounding_box.dtype if torch.is_floating_point(bounding_box) else torch.float32 angle, translate, shear, center = _affine_parse_args(
device = bounding_box.device angle, translate, scale, shear, InterpolationMode.NEAREST, center
)
if translate is None:
translate = [0.0, 0.0]
if scale is None:
scale = 1.0
if shear is None:
shear = [0.0, 0.0]
if center is None: if center is None:
height, width = image_size height, width = image_size
center_f = [width * 0.5, height * 0.5] center = [width * 0.5, height * 0.5]
else:
center_f = [float(c) for c in center] dtype = bounding_box.dtype if torch.is_floating_point(bounding_box) else torch.float32
device = bounding_box.device
translate_f = [float(t) for t in translate]
affine_matrix = torch.tensor( affine_matrix = torch.tensor(
_get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear, inverted=False), _get_inverse_affine_matrix(center, angle, translate, scale, shear, inverted=False),
dtype=dtype, dtype=dtype,
device=device, device=device,
).view(2, 3) ).view(2, 3)
...@@ -351,7 +350,7 @@ def affine_bounding_box( ...@@ -351,7 +350,7 @@ def affine_bounding_box(
bounding_box: torch.Tensor, bounding_box: torch.Tensor,
format: features.BoundingBoxFormat, format: features.BoundingBoxFormat,
image_size: Tuple[int, int], image_size: Tuple[int, int],
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -373,7 +372,7 @@ def affine_bounding_box( ...@@ -373,7 +372,7 @@ def affine_bounding_box(
def affine_mask( def affine_mask(
mask: torch.Tensor, mask: torch.Tensor,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -419,7 +418,7 @@ def _convert_fill_arg(fill: features.FillType) -> features.FillTypeJIT: ...@@ -419,7 +418,7 @@ def _convert_fill_arg(fill: features.FillType) -> features.FillTypeJIT:
def affine( def affine(
inpt: features.InputTypeJIT, inpt: features.InputTypeJIT,
angle: float, angle: Union[int, float],
translate: List[float], translate: List[float],
scale: float, scale: float,
shear: List[float], shear: List[float],
...@@ -427,6 +426,7 @@ def affine( ...@@ -427,6 +426,7 @@ def affine(
fill: features.FillTypeJIT = None, fill: features.FillTypeJIT = None,
center: Optional[List[float]] = None, center: Optional[List[float]] = None,
) -> features.InputTypeJIT: ) -> features.InputTypeJIT:
# TODO: consider deprecating integers from angle and shear on the future
if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)): if isinstance(inpt, torch.Tensor) and (torch.jit.is_scripting() or not isinstance(inpt, features._Feature)):
return affine_image_tensor( return affine_image_tensor(
inpt, inpt,
...@@ -528,7 +528,16 @@ def rotate_bounding_box( ...@@ -528,7 +528,16 @@ def rotate_bounding_box(
bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
).view(-1, 4) ).view(-1, 4)
out_bboxes = _affine_bounding_box_xyxy(bounding_box, image_size, angle=-angle, center=center, expand=expand) out_bboxes = _affine_bounding_box_xyxy(
bounding_box,
image_size,
angle=-angle,
translate=[0.0, 0.0],
scale=1.0,
shear=[0.0, 0.0],
center=center,
expand=expand,
)
return convert_format_bounding_box( return convert_format_bounding_box(
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False
......
...@@ -23,6 +23,7 @@ def normalize( ...@@ -23,6 +23,7 @@ def normalize(
def gaussian_blur_image_tensor( def gaussian_blur_image_tensor(
img: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None img: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None
) -> torch.Tensor: ) -> torch.Tensor:
# TODO: consider deprecating integers from sigma on the future
if isinstance(kernel_size, int): if isinstance(kernel_size, int):
kernel_size = [kernel_size, kernel_size] kernel_size = [kernel_size, kernel_size]
if len(kernel_size) != 2: if len(kernel_size) != 2:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment