Unverified Commit 7896ffd9 authored by Siddarth Ijju's avatar Siddarth Ijju Committed by GitHub
Browse files

Allow v2 Resize to resize longer edge exactly to `max_size` (#8459)


Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
parent 10239873
...@@ -99,7 +99,7 @@ def _script(obj): ...@@ -99,7 +99,7 @@ def _script(obj):
return torch.jit.script(obj) return torch.jit.script(obj)
except Exception as error: except Exception as error:
name = getattr(obj, "__name__", obj.__class__.__name__) name = getattr(obj, "__name__", obj.__class__.__name__)
raise AssertionError(f"Trying to `torch.jit.script` '{name}' raised the error above.") from error raise AssertionError(f"Trying to `torch.jit.script` `{name}` raised the error above.") from error
def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
...@@ -553,10 +553,12 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new ...@@ -553,10 +553,12 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new
class TestResize: class TestResize:
INPUT_SIZE = (17, 11) INPUT_SIZE = (17, 11)
OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)] OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)]
def _make_max_size_kwarg(self, *, use_max_size, size): def _make_max_size_kwarg(self, *, use_max_size, size):
if use_max_size: if size is None:
max_size = min(list(self.INPUT_SIZE))
elif use_max_size:
if not (isinstance(size, int) or len(size) == 1): if not (isinstance(size, int) or len(size) == 1):
# This would result in an `ValueError` # This would result in an `ValueError`
return None return None
...@@ -568,10 +570,13 @@ class TestResize: ...@@ -568,10 +570,13 @@ class TestResize:
return dict(max_size=max_size) return dict(max_size=max_size)
def _compute_output_size(self, *, input_size, size, max_size): def _compute_output_size(self, *, input_size, size, max_size):
if not (isinstance(size, int) or len(size) == 1): if size is None:
size = max_size
elif not (isinstance(size, int) or len(size) == 1):
return tuple(size) return tuple(size)
if not isinstance(size, int): elif not isinstance(size, int):
size = size[0] size = size[0]
old_height, old_width = input_size old_height, old_width = input_size
...@@ -658,10 +663,13 @@ class TestResize: ...@@ -658,10 +663,13 @@ class TestResize:
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
) )
def test_functional(self, size, make_input): def test_functional(self, size, make_input):
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)
check_functional( check_functional(
F.resize, F.resize,
make_input(self.INPUT_SIZE), make_input(self.INPUT_SIZE),
size=size, size=size,
**max_size_kwarg,
antialias=True, antialias=True,
check_scripted_smoke=not isinstance(size, int), check_scripted_smoke=not isinstance(size, int),
) )
...@@ -695,11 +703,13 @@ class TestResize: ...@@ -695,11 +703,13 @@ class TestResize:
], ],
) )
def test_transform(self, size, device, make_input): def test_transform(self, size, device, make_input):
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)
check_transform( check_transform(
transforms.Resize(size=size, antialias=True), transforms.Resize(size=size, **max_size_kwarg, antialias=True),
make_input(self.INPUT_SIZE, device=device), make_input(self.INPUT_SIZE, device=device),
# atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes
check_v1_compatibility=dict(rtol=0, atol=1), check_v1_compatibility=dict(rtol=0, atol=1) if size is not None else False,
) )
def _check_output_size(self, input, output, *, size, max_size): def _check_output_size(self, input, output, *, size, max_size):
...@@ -801,7 +811,11 @@ class TestResize: ...@@ -801,7 +811,11 @@ class TestResize:
], ],
) )
def test_max_size_error(self, size, make_input): def test_max_size_error(self, size, make_input):
if isinstance(size, int) or len(size) == 1: if size is None:
# value can be anything other than an integer
max_size = None
match = "max_size must be an integer when size is None"
elif isinstance(size, int) or len(size) == 1:
max_size = (size if isinstance(size, int) else size[0]) - 1 max_size = (size if isinstance(size, int) else size[0]) - 1
match = "must be strictly greater than the requested size" match = "must be strictly greater than the requested size"
else: else:
...@@ -812,6 +826,37 @@ class TestResize: ...@@ -812,6 +826,37 @@ class TestResize:
with pytest.raises(ValueError, match=match): with pytest.raises(ValueError, match=match):
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True)
if isinstance(size, list) and len(size) != 1:
with pytest.raises(ValueError, match="max_size should only be passed if size is None or specifies"):
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=500)
@pytest.mark.parametrize(
"input_size, max_size, expected_size",
[
((10, 10), 10, (10, 10)),
((10, 20), 40, (20, 40)),
((20, 10), 40, (40, 20)),
((10, 20), 10, (5, 10)),
((20, 10), 10, (10, 5)),
],
)
@pytest.mark.parametrize(
"make_input",
[
make_image_tensor,
make_image_pil,
make_image,
make_bounding_boxes,
make_segmentation_mask,
make_detection_masks,
make_video,
],
)
def test_resize_size_none(self, input_size, max_size, expected_size, make_input):
img = make_input(input_size)
out = F.resize(img, size=None, max_size=max_size)
assert F.get_size(out)[-2:] == list(expected_size)
@pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"make_input", "make_input",
...@@ -834,7 +879,7 @@ class TestResize: ...@@ -834,7 +879,7 @@ class TestResize:
assert_equal(actual, expected) assert_equal(actual, expected)
def test_transform_unknown_size_error(self): def test_transform_unknown_size_error(self):
with pytest.raises(ValueError, match="size can either be an integer or a sequence of one or two integers"): with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"):
transforms.Resize(size=object()) transforms.Resize(size=object())
@pytest.mark.parametrize( @pytest.mark.parametrize(
......
...@@ -351,13 +351,22 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool ...@@ -351,13 +351,22 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool
def _compute_resized_output_size( def _compute_resized_output_size(
image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None image_size: Tuple[int, int],
size: Optional[List[int]],
max_size: Optional[int] = None,
allow_size_none: bool = False, # only True in v2
) -> List[int]: ) -> List[int]:
if len(size) == 1: # specified size only for the smallest edge
h, w = image_size h, w = image_size
short, long = (w, h) if w <= h else (h, w) short, long = (w, h) if w <= h else (h, w)
if size is None:
if not allow_size_none:
raise ValueError("This should never happen!!")
if not isinstance(max_size, int):
raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.")
new_short, new_long = int(max_size * short / long), max_size
new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
elif len(size) == 1: # specified size only for the smallest edge
requested_new_short = size if isinstance(size, int) else size[0] requested_new_short = size if isinstance(size, int) else size[0]
new_short, new_long = requested_new_short, int(requested_new_short * long / short) new_short, new_long = requested_new_short, int(requested_new_short * long / short)
if max_size is not None: if max_size is not None:
......
...@@ -75,11 +75,15 @@ class Resize(Transform): ...@@ -75,11 +75,15 @@ class Resize(Transform):
the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape. the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.
Args: Args:
size (sequence or int): Desired output size. If size is a sequence like size (sequence, int, or None): Desired
(h, w), output size will be matched to this. If size is an int, output size.
smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to - If size is a sequence like (h, w), output size will be matched to this.
- If size is an int, smaller edge of the image will be matched to this
number. i.e, if height > width, then image will be rescaled to
(size * height / width, size). (size * height / width, size).
- If size is None, the output shape is determined by the ``max_size``
parameter.
.. note:: .. note::
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
...@@ -89,13 +93,21 @@ class Resize(Transform): ...@@ -89,13 +93,21 @@ class Resize(Transform):
``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
max_size (int, optional): The maximum allowed for the longer edge of max_size (int, optional): The maximum allowed for the longer edge of
the resized image. If the longer edge of the image is greater the resized image.
- If ``size`` is an int: if the longer edge of the image is greater
than ``max_size`` after being resized according to ``size``, than ``max_size`` after being resized according to ``size``,
``size`` will be overruled so that the longer edge is equal to ``size`` will be overruled so that the longer edge is equal to
``max_size``. ``max_size``. As a result, the smaller edge may be shorter than
As a result, the smaller edge may be shorter than ``size``. This ``size``. This is only supported if ``size`` is an int (or a
is only supported if ``size`` is an int (or a sequence of length sequence of length 1 in torchscript mode).
1 in torchscript mode). - If ``size`` is None: the longer edge of the image will be matched
to max_size. i.e, if height > width, then image will be rescaled
to (max_size, max_size * width / height).
This should be left to ``None`` (default) when ``size`` is a
sequence.
antialias (bool, optional): Whether to apply antialiasing. antialias (bool, optional): Whether to apply antialiasing.
It only affects **tensors** with bilinear or bicubic modes and it is It only affects **tensors** with bilinear or bicubic modes and it is
ignored otherwise: on PIL images, antialiasing is always applied on ignored otherwise: on PIL images, antialiasing is always applied on
...@@ -120,7 +132,7 @@ class Resize(Transform): ...@@ -120,7 +132,7 @@ class Resize(Transform):
def __init__( def __init__(
self, self,
size: Union[int, Sequence[int]], size: Union[int, Sequence[int], None],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: Optional[bool] = True, antialias: Optional[bool] = True,
...@@ -131,9 +143,12 @@ class Resize(Transform): ...@@ -131,9 +143,12 @@ class Resize(Transform):
size = [size] size = [size]
elif isinstance(size, Sequence) and len(size) in {1, 2}: elif isinstance(size, Sequence) and len(size) in {1, 2}:
size = list(size) size = list(size)
elif size is None:
if not isinstance(max_size, int):
raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.")
else: else:
raise ValueError( raise ValueError(
f"size can either be an integer or a sequence of one or two integers, but got {size} instead." f"size can be an integer, a sequence of one or two integers, or None, but got {size} instead."
) )
self.size = size self.size = size
......
...@@ -159,21 +159,21 @@ vflip = vertical_flip ...@@ -159,21 +159,21 @@ vflip = vertical_flip
def _compute_resized_output_size( def _compute_resized_output_size(
canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None canvas_size: Tuple[int, int], size: Optional[List[int]], max_size: Optional[int] = None
) -> List[int]: ) -> List[int]:
if isinstance(size, int): if isinstance(size, int):
size = [size] size = [size]
elif max_size is not None and len(size) != 1: elif max_size is not None and size is not None and len(size) != 1:
raise ValueError( raise ValueError(
"max_size should only be passed if size specifies the length of the smaller edge, " "max_size should only be passed if size is None or specifies the length of the smaller edge, "
"i.e. size should be an int or a sequence of length 1 in torchscript mode." "i.e. size should be an int or a sequence of length 1 in torchscript mode."
) )
return __compute_resized_output_size(canvas_size, size=size, max_size=max_size) return __compute_resized_output_size(canvas_size, size=size, max_size=max_size, allow_size_none=True)
def resize( def resize(
inpt: torch.Tensor, inpt: torch.Tensor,
size: List[int], size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: Optional[bool] = True, antialias: Optional[bool] = True,
...@@ -206,7 +206,7 @@ def _do_native_uint8_resize_on_cpu(interpolation: InterpolationMode) -> bool: ...@@ -206,7 +206,7 @@ def _do_native_uint8_resize_on_cpu(interpolation: InterpolationMode) -> bool:
@_register_kernel_internal(resize, tv_tensors.Image) @_register_kernel_internal(resize, tv_tensors.Image)
def resize_image( def resize_image(
image: torch.Tensor, image: torch.Tensor,
size: List[int], size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: Optional[bool] = True, antialias: Optional[bool] = True,
...@@ -310,7 +310,7 @@ def __resize_image_pil_dispatch( ...@@ -310,7 +310,7 @@ def __resize_image_pil_dispatch(
return _resize_image_pil(image, size=size, interpolation=interpolation, max_size=max_size) return _resize_image_pil(image, size=size, interpolation=interpolation, max_size=max_size)
def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = None) -> torch.Tensor: def resize_mask(mask: torch.Tensor, size: Optional[List[int]], max_size: Optional[int] = None) -> torch.Tensor:
if mask.ndim < 3: if mask.ndim < 3:
mask = mask.unsqueeze(0) mask = mask.unsqueeze(0)
needs_squeeze = True needs_squeeze = True
...@@ -334,7 +334,10 @@ def _resize_mask_dispatch( ...@@ -334,7 +334,10 @@ def _resize_mask_dispatch(
def resize_bounding_boxes( def resize_bounding_boxes(
bounding_boxes: torch.Tensor, canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None bounding_boxes: torch.Tensor,
canvas_size: Tuple[int, int],
size: Optional[List[int]],
max_size: Optional[int] = None,
) -> Tuple[torch.Tensor, Tuple[int, int]]: ) -> Tuple[torch.Tensor, Tuple[int, int]]:
old_height, old_width = canvas_size old_height, old_width = canvas_size
new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size) new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size)
...@@ -353,7 +356,7 @@ def resize_bounding_boxes( ...@@ -353,7 +356,7 @@ def resize_bounding_boxes(
@_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) @_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
def _resize_bounding_boxes_dispatch( def _resize_bounding_boxes_dispatch(
inpt: tv_tensors.BoundingBoxes, size: List[int], max_size: Optional[int] = None, **kwargs: Any inpt: tv_tensors.BoundingBoxes, size: Optional[List[int]], max_size: Optional[int] = None, **kwargs: Any
) -> tv_tensors.BoundingBoxes: ) -> tv_tensors.BoundingBoxes:
output, canvas_size = resize_bounding_boxes( output, canvas_size = resize_bounding_boxes(
inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size
...@@ -364,7 +367,7 @@ def _resize_bounding_boxes_dispatch( ...@@ -364,7 +367,7 @@ def _resize_bounding_boxes_dispatch(
@_register_kernel_internal(resize, tv_tensors.Video) @_register_kernel_internal(resize, tv_tensors.Video)
def resize_video( def resize_video(
video: torch.Tensor, video: torch.Tensor,
size: List[int], size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: Optional[bool] = True, antialias: Optional[bool] = True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment