Unverified Commit e13b8f5c authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

port tests for F.pad and transforms.Pad (#7939)

parent e9f80941
...@@ -390,21 +390,6 @@ def test_pure_tensor_heuristic(flat_inputs): ...@@ -390,21 +390,6 @@ def test_pure_tensor_heuristic(flat_inputs):
assert transform.was_applied(output, input) assert transform.was_applied(output, input)
class TestPad:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.Pad("abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.Pad([-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.Pad(12, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.Pad(12, padding_mode="abc")
class TestRandomZoomOut: class TestRandomZoomOut:
def test_assertions(self): def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate fill arg"): with pytest.raises(TypeError, match="Got inappropriate fill arg"):
......
...@@ -109,21 +109,6 @@ CONSISTENCY_CONFIGS = [ ...@@ -109,21 +109,6 @@ CONSISTENCY_CONFIGS = [
], ],
make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(20, 19)]), make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(20, 19)]),
), ),
ConsistencyConfig(
v2_transforms.Pad,
legacy_transforms.Pad,
[
NotScriptableArgsKwargs(3),
ArgsKwargs([3]),
ArgsKwargs([2, 3]),
ArgsKwargs([3, 2, 1, 4]),
NotScriptableArgsKwargs(5, fill=1, padding_mode="constant"),
ArgsKwargs([5], fill=1, padding_mode="constant"),
NotScriptableArgsKwargs(5, padding_mode="edge"),
NotScriptableArgsKwargs(5, padding_mode="reflect"),
NotScriptableArgsKwargs(5, padding_mode="symmetric"),
],
),
*[ *[
ConsistencyConfig( ConsistencyConfig(
v2_transforms.LinearTransformation, v2_transforms.LinearTransformation,
......
...@@ -524,75 +524,6 @@ class TestClampBoundingBoxes: ...@@ -524,75 +524,6 @@ class TestClampBoundingBoxes:
# `transforms_v2_kernel_infos.py` # `transforms_v2_kernel_infos.py`
def _parse_padding(padding):
if isinstance(padding, int):
return [padding] * 4
if isinstance(padding, list):
if len(padding) == 1:
return padding * 4
if len(padding) == 2:
return padding * 2 # [left, up, right, down]
return padding
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]])
def test_correctness_pad_bounding_boxes(device, padding):
def _compute_expected_bbox(bbox, format, padding_):
pad_left, pad_up, _, _ = _parse_padding(padding_)
dtype = bbox.dtype
bbox = (
bbox.clone()
if format == tv_tensors.BoundingBoxFormat.XYXY
else convert_bounding_box_format(bbox, old_format=format, new_format=tv_tensors.BoundingBoxFormat.XYXY)
)
bbox[0::2] += pad_left
bbox[1::2] += pad_up
bbox = convert_bounding_box_format(bbox, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format)
if bbox.dtype != dtype:
# Temporary cast to original dtype
# e.g. float32 -> int
bbox = bbox.to(dtype)
return bbox
def _compute_expected_canvas_size(bbox, padding_):
pad_left, pad_up, pad_right, pad_down = _parse_padding(padding_)
height, width = bbox.canvas_size
return height + pad_up + pad_down, width + pad_left + pad_right
for bboxes in make_multiple_bounding_boxes(extra_dims=((4,),)):
bboxes = bboxes.to(device)
bboxes_format = bboxes.format
bboxes_canvas_size = bboxes.canvas_size
output_boxes, output_canvas_size = F.pad_bounding_boxes(
bboxes, format=bboxes_format, canvas_size=bboxes_canvas_size, padding=padding
)
torch.testing.assert_close(output_canvas_size, _compute_expected_canvas_size(bboxes, padding))
expected_bboxes = torch.stack(
[_compute_expected_bbox(b, bboxes_format, padding) for b in bboxes.reshape(-1, 4).unbind()]
).reshape(bboxes.shape)
torch.testing.assert_close(output_boxes, expected_bboxes, atol=1, rtol=0)
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_correctness_pad_segmentation_mask_on_fixed_input(device):
mask = torch.ones((1, 3, 3), dtype=torch.long, device=device)
out_mask = F.pad_mask(mask, padding=[1, 1, 1, 1])
expected_mask = torch.zeros((1, 5, 5), dtype=torch.long, device=device)
expected_mask[:, 1:-1, 1:-1] = 1
torch.testing.assert_close(out_mask, expected_mask)
@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize( @pytest.mark.parametrize(
"startpoints, endpoints", "startpoints, endpoints",
......
...@@ -3346,3 +3346,171 @@ class TestResizedCrop: ...@@ -3346,3 +3346,171 @@ class TestResizedCrop:
for param in ["scale", "ratio"]: for param in ["scale", "ratio"]:
with pytest.warns(match="Scale and ratio should be of kind"): with pytest.warns(match="Scale and ratio should be of kind"):
transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]}) transforms.RandomResizedCrop(size=self.INPUT_SIZE, **{param: [1, 0]})
class TestPad:
EXHAUSTIVE_TYPE_PADDINGS = [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
CORRECTNESS_PADDINGS = [
padding
for padding in EXHAUSTIVE_TYPE_PADDINGS
if isinstance(padding, int) or isinstance(padding, list) and len(padding) > 1
]
PADDING_MODES = ["constant", "symmetric", "edge", "reflect"]
@param_value_parametrization(
padding=EXHAUSTIVE_TYPE_PADDINGS,
fill=EXHAUSTIVE_TYPE_FILLS,
padding_mode=PADDING_MODES,
)
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_image(self, param, value, dtype, device):
if param == "fill":
value = adapt_fill(value, dtype=dtype)
kwargs = {param: value}
if param != "padding":
kwargs["padding"] = [1]
image = make_image(dtype=dtype, device=device)
check_kernel(
F.pad_image,
image,
**kwargs,
check_scripted_vs_eager=not (
(param == "padding" and isinstance(value, int))
# See https://github.com/pytorch/vision/pull/7252#issue-1585585521 for details
or (
param == "fill"
and (
isinstance(value, tuple) or (isinstance(value, list) and any(isinstance(v, int) for v in value))
)
)
),
)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
def test_kernel_bounding_boxes(self, format):
bounding_boxes = make_bounding_boxes(format=format)
check_kernel(
F.pad_bounding_boxes,
bounding_boxes,
format=bounding_boxes.format,
canvas_size=bounding_boxes.canvas_size,
padding=[1],
)
@pytest.mark.parametrize("padding_mode", ["symmetric", "edge", "reflect"])
def test_kernel_bounding_boxes_errors(self, padding_mode):
bounding_boxes = make_bounding_boxes()
with pytest.raises(ValueError, match=f"'{padding_mode}' is not supported"):
F.pad_bounding_boxes(
bounding_boxes,
format=bounding_boxes.format,
canvas_size=bounding_boxes.canvas_size,
padding=[1],
padding_mode=padding_mode,
)
@pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
def test_kernel_mask(self, make_mask):
check_kernel(F.pad_mask, make_mask(), padding=[1])
@pytest.mark.parametrize("fill", [[1], (0,), [1, 0, 1], (0, 1, 0)])
def test_kernel_mask_errors(self, fill):
with pytest.raises(ValueError, match="Non-scalar fill value is not supported"):
check_kernel(F.pad_mask, make_segmentation_mask(), padding=[1], fill=fill)
@pytest.mark.parametrize(
"make_input",
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
)
def test_functional(self, make_input):
check_functional(F.pad, make_input(), padding=[1])
@pytest.mark.parametrize(
("kernel", "input_type"),
[
(F.pad_image, torch.Tensor),
# The PIL kernel uses fill=0 as default rather than fill=None as all others.
# Since the whole fill story is already really inconsistent, we won't introduce yet another case to allow
# for this test to pass.
# See https://github.com/pytorch/vision/issues/6623 for a discussion.
# (F._pad_image_pil, PIL.Image.Image),
(F.pad_image, tv_tensors.Image),
(F.pad_bounding_boxes, tv_tensors.BoundingBoxes),
(F.pad_mask, tv_tensors.Mask),
(F.pad_video, tv_tensors.Video),
],
)
def test_functional_signature(self, kernel, input_type):
check_functional_kernel_signature_match(F.pad, kernel=kernel, input_type=input_type)
@pytest.mark.parametrize(
"make_input",
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
)
def test_transform(self, make_input):
check_transform(transforms.Pad(padding=[1]), make_input())
def test_transform_errors(self):
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.Pad("abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.Pad([-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.Pad(12, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.Pad(12, padding_mode="abc")
@pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
@pytest.mark.parametrize(
("padding_mode", "fill"),
[
*[("constant", fill) for fill in CORRECTNESS_FILLS],
*[(padding_mode, None) for padding_mode in ["symmetric", "edge", "reflect"]],
],
)
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
def test_image_correctness(self, padding, padding_mode, fill, fn):
image = make_image(dtype=torch.uint8, device="cpu")
actual = fn(image, padding=padding, padding_mode=padding_mode, fill=fill)
expected = F.to_image(F.pad(F.to_pil_image(image), padding=padding, padding_mode=padding_mode, fill=fill))
assert_equal(actual, expected)
def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
if isinstance(padding, int):
padding = [padding]
left, top, right, bottom = padding * (4 // len(padding))
affine_matrix = np.array(
[
[1, 0, left],
[0, 1, top],
],
)
height = bounding_boxes.canvas_size[0] + top + bottom
width = bounding_boxes.canvas_size[1] + left + right
return reference_affine_bounding_boxes_helper(
bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width)
)
@pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn):
bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device)
actual = fn(bounding_boxes, padding=padding)
expected = self._reference_pad_bounding_boxes(bounding_boxes, padding=padding)
assert_equal(actual, expected)
import pytest import pytest
import torchvision.transforms.v2.functional as F import torchvision.transforms.v2.functional as F
from torchvision import tv_tensors from torchvision import tv_tensors
from transforms_v2_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition from transforms_v2_kernel_infos import KERNEL_INFOS
from transforms_v2_legacy_utils import InfoBase, TestMark from transforms_v2_legacy_utils import InfoBase, TestMark
__all__ = ["DispatcherInfo", "DISPATCHER_INFOS"] __all__ = ["DispatcherInfo", "DISPATCHER_INFOS"]
...@@ -111,20 +111,6 @@ multi_crop_skips.append(skip_dispatch_tv_tensor) ...@@ -111,20 +111,6 @@ multi_crop_skips.append(skip_dispatch_tv_tensor)
DISPATCHER_INFOS = [ DISPATCHER_INFOS = [
DispatcherInfo(
F.pad,
kernels={
tv_tensors.Image: F.pad_image,
tv_tensors.Video: F.pad_video,
tv_tensors.BoundingBoxes: F.pad_bounding_boxes,
tv_tensors.Mask: F.pad_mask,
},
pil_kernel_info=PILKernelInfo(F._pad_image_pil, kernel_name="pad_image_pil"),
test_marks=[
xfail_jit("F.pad only supports vector fills for list of floats", condition=pad_xfail_jit_fill_condition),
xfail_jit_python_scalar_arg("padding"),
],
),
DispatcherInfo( DispatcherInfo(
F.perspective, F.perspective,
kernels={ kernels={
......
import functools import functools
import itertools import itertools
import numpy as np
import PIL.Image import PIL.Image
import pytest import pytest
import torch.testing import torch.testing
import torchvision.transforms.v2.functional as F import torchvision.transforms.v2.functional as F
from torchvision import tv_tensors from torchvision import tv_tensors
from torchvision.transforms._functional_tensor import _max_value as get_max_value, _parse_pad_padding from torchvision.transforms._functional_tensor import _max_value as get_max_value
from transforms_v2_legacy_utils import ( from transforms_v2_legacy_utils import ( # noqa: F401
ArgsKwargs, ArgsKwargs,
combinations_grid, combinations_grid,
DEFAULT_PORTRAIT_SPATIAL_SIZE, DEFAULT_PORTRAIT_SPATIAL_SIZE,
...@@ -183,211 +182,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs): ...@@ -183,211 +182,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
return other_args, dict(kwargs, fill=fill) return other_args, dict(kwargs, fill=fill)
def reference_affine_bounding_boxes_helper(bounding_boxes, *, format, canvas_size, affine_matrix):
def transform(bbox, affine_matrix_, format_, canvas_size_):
# Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
in_dtype = bbox.dtype
if not torch.is_floating_point(bbox):
bbox = bbox.float()
bbox_xyxy = F.convert_bounding_box_format(
bbox.as_subclass(torch.Tensor),
old_format=format_,
new_format=tv_tensors.BoundingBoxFormat.XYXY,
inplace=True,
)
points = np.array(
[
[bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
[bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
[bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
[bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
]
)
transformed_points = np.matmul(points, affine_matrix_.T)
out_bbox = torch.tensor(
[
np.min(transformed_points[:, 0]).item(),
np.min(transformed_points[:, 1]).item(),
np.max(transformed_points[:, 0]).item(),
np.max(transformed_points[:, 1]).item(),
],
dtype=bbox_xyxy.dtype,
)
out_bbox = F.convert_bounding_box_format(
out_bbox, old_format=tv_tensors.BoundingBoxFormat.XYXY, new_format=format_, inplace=True
)
# It is important to clamp before casting, especially for CXCYWH format, dtype=int64
out_bbox = F.clamp_bounding_boxes(out_bbox, format=format_, canvas_size=canvas_size_)
out_bbox = out_bbox.to(dtype=in_dtype)
return out_bbox
return torch.stack(
[transform(b, affine_matrix, format, canvas_size) for b in bounding_boxes.reshape(-1, 4).unbind()]
).reshape(bounding_boxes.shape)
_PAD_PARAMS = combinations_grid(
padding=[[1], [1, 1], [1, 1, 2, 2]],
padding_mode=["constant", "symmetric", "edge", "reflect"],
)
def sample_inputs_pad_image_tensor():
make_pad_image_loaders = functools.partial(
make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]
)
for image_loader, padding in itertools.product(
make_pad_image_loaders(),
[1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]],
):
yield ArgsKwargs(image_loader, padding=padding)
for image_loader in make_pad_image_loaders():
for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
yield ArgsKwargs(image_loader, padding=[1], fill=fill)
for image_loader, padding_mode in itertools.product(
# We branch for non-constant padding and integer inputs
make_pad_image_loaders(dtypes=[torch.uint8]),
["constant", "symmetric", "edge", "reflect"],
):
yield ArgsKwargs(image_loader, padding=[1], padding_mode=padding_mode)
# `torch.nn.functional.pad` does not support symmetric padding, and thus we have a custom implementation. Besides
# negative padding, this is already handled by the inputs above.
for image_loader in make_pad_image_loaders():
yield ArgsKwargs(image_loader, padding=[-1], padding_mode="symmetric")
def reference_inputs_pad_image_tensor():
for image_loader, params in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PAD_PARAMS
):
for fill in get_fills(
num_channels=image_loader.num_channels,
dtype=image_loader.dtype,
):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
if isinstance(fill, (list, tuple)):
continue
yield ArgsKwargs(image_loader, fill=fill, **params)
def sample_inputs_pad_bounding_boxes():
for bounding_boxes_loader, padding in itertools.product(
make_bounding_box_loaders(), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
):
yield ArgsKwargs(
bounding_boxes_loader,
format=bounding_boxes_loader.format,
canvas_size=bounding_boxes_loader.canvas_size,
padding=padding,
padding_mode="constant",
)
def sample_inputs_pad_mask():
for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]):
yield ArgsKwargs(mask_loader, padding=[1])
def reference_inputs_pad_mask():
for mask_loader, fill, params in itertools.product(
make_mask_loaders(num_objects=[1], extra_dims=[()]), [None, 127], _PAD_PARAMS
):
yield ArgsKwargs(mask_loader, fill=fill, **params)
def sample_inputs_pad_video():
for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
yield ArgsKwargs(video_loader, padding=[1])
def reference_pad_bounding_boxes(bounding_boxes, *, format, canvas_size, padding, padding_mode):
left, right, top, bottom = _parse_pad_padding(padding)
affine_matrix = np.array(
[
[1, 0, left],
[0, 1, top],
],
dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32",
)
height = canvas_size[0] + top + bottom
width = canvas_size[1] + left + right
expected_bboxes = reference_affine_bounding_boxes_helper(
bounding_boxes, format=format, canvas_size=(height, width), affine_matrix=affine_matrix
)
return expected_bboxes, (height, width)
def reference_inputs_pad_bounding_boxes():
for bounding_boxes_loader, padding in itertools.product(
make_bounding_box_loaders(extra_dims=((), (4,))), [1, (1,), (1, 2), (1, 2, 3, 4), [1], [1, 2], [1, 2, 3, 4]]
):
yield ArgsKwargs(
bounding_boxes_loader,
format=bounding_boxes_loader.format,
canvas_size=bounding_boxes_loader.canvas_size,
padding=padding,
padding_mode="constant",
)
def pad_xfail_jit_fill_condition(args_kwargs):
fill = args_kwargs.kwargs.get("fill")
if not isinstance(fill, (list, tuple)):
return False
elif isinstance(fill, tuple):
return True
else: # isinstance(fill, list):
return all(isinstance(f, int) for f in fill)
KERNEL_INFOS.extend(
[
KernelInfo(
F.pad_image,
sample_inputs_fn=sample_inputs_pad_image_tensor,
reference_fn=pil_reference_wrapper(F._pad_image_pil),
reference_inputs_fn=reference_inputs_pad_image_tensor,
float32_vs_uint8=float32_vs_uint8_fill_adapter,
closeness_kwargs=float32_vs_uint8_pixel_difference(),
test_marks=[
xfail_jit_python_scalar_arg("padding"),
xfail_jit(
"F.pad only supports vector fills for list of floats", condition=pad_xfail_jit_fill_condition
),
],
),
KernelInfo(
F.pad_bounding_boxes,
sample_inputs_fn=sample_inputs_pad_bounding_boxes,
reference_fn=reference_pad_bounding_boxes,
reference_inputs_fn=reference_inputs_pad_bounding_boxes,
test_marks=[
xfail_jit_python_scalar_arg("padding"),
],
),
KernelInfo(
F.pad_mask,
sample_inputs_fn=sample_inputs_pad_mask,
reference_fn=pil_reference_wrapper(F._pad_image_pil),
reference_inputs_fn=reference_inputs_pad_mask,
float32_vs_uint8=float32_vs_uint8_fill_adapter,
),
KernelInfo(
F.pad_video,
sample_inputs_fn=sample_inputs_pad_video,
),
]
)
_PERSPECTIVE_COEFFS = [ _PERSPECTIVE_COEFFS = [
[1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018],
[0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment