[PoC] refactor transforms v2 tests (#7562)

Co-authored-by: Nicolas Hug <nh.nicolas.hug@gmail.com> Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>

[PoC] refactor transforms v2 tests (#7562)
Co-authored-by: Nicolas Hug <nh.nicolas.hug@gmail.com> Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
5178a2e2 · Philip Meier · GitHub · 17d50fc6 · 5178a2e2 · 5178a2e2
Unverified Commit 5178a2e2 authored Jun 21, 2023 by Philip Meier Committed by GitHub Jun 21, 2023
5 changed files
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -7,9 +7,11 @@ import itertools
 import os
 import pathlib
 import random
+import re
 import shutil
 import sys
 import tempfile
+import warnings
 from collections import defaultdict
 from subprocess import CalledProcessError, check_output, STDOUT
 from typing import Callable, Sequence, Tuple, Union
@@ -880,3 +882,23 @@ def assert_run_python_script(source_code):
            raise RuntimeError(f"script errored with output:\n{e.output.decode()}")
        if out != b"":
            raise AssertionError(out.decode())
+@contextlib.contextmanager
+def assert_no_warnings():
+    # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes
+    # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        yield
+@contextlib.contextmanager
+def ignore_jit_no_profile_information_warning():
+    # Calling a scripted object often triggers a warning like
+    # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information`
+    # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore
+    # them.
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning)
+        yield
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -1711,8 +1711,6 @@ def test_antialias_warning():
    tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8)
    match = "The default value of the antialias parameter"
-    with pytest.warns(UserWarning, match=match):
-        transforms.Resize((20, 20))(tensor_img)
    with pytest.warns(UserWarning, match=match):
        transforms.RandomResizedCrop((20, 20))(tensor_img)
    with pytest.warns(UserWarning, match=match):
@@ -1722,18 +1720,6 @@ def test_antialias_warning():
    with pytest.warns(UserWarning, match=match):
        transforms.RandomResize(10, 20)(tensor_img)
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_img, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_video, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_video(tensor_video, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        datapoints.Image(tensor_img).resize((20, 20))
    with pytest.warns(UserWarning, match=match):
        datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
@@ -1744,27 +1730,17 @@ def test_antialias_warning():
    with warnings.catch_warnings():
        warnings.simplefilter("error")
-        transforms.Resize((20, 20))(pil_img)
        transforms.RandomResizedCrop((20, 20))(pil_img)
        transforms.ScaleJitter((20, 20))(pil_img)
        transforms.RandomShortestSize((20, 20))(pil_img)
        transforms.RandomResize(10, 20)(pil_img)
-        transforms.functional.resize(pil_img, (20, 20))
-        transforms.Resize((20, 20), antialias=True)(tensor_img)
        transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img)
        transforms.ScaleJitter((20, 20), antialias=True)(tensor_img)
        transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img)
        transforms.RandomResize(10, 20, antialias=True)(tensor_img)
-        transforms.functional.resize(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize(tensor_video, (20, 20), antialias=True)
-        transforms.functional.resize_video(tensor_video, (20, 20), antialias=True)
-        datapoints.Image(tensor_img).resize((20, 20), antialias=True)
        datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
-        datapoints.Video(tensor_video).resize((20, 20), antialias=True)
        datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)

--- a/test/test_transforms_v2_refactored.py
+++ b/test/test_transforms_v2_refactored.py
+import contextlib
+import inspect
+import re
+from typing import get_type_hints
+from unittest import mock
+import numpy as np
+import PIL.Image
+import pytest
+import torch
+import torchvision.transforms.v2 as transforms
+from common_utils import (
+    assert_equal,
+    assert_no_warnings,
+    cache,
+    cpu_and_cuda,
+    ignore_jit_no_profile_information_warning,
+    make_bounding_box,
+    make_detection_mask,
+    make_image,
+    make_segmentation_mask,
+    make_video,
+)
+from torch.testing import assert_close
+from torchvision import datapoints
+from torchvision.transforms.functional import pil_modes_mapping
+from torchvision.transforms.v2 import functional as F
+def _to_tolerances(maybe_tolerance_dict):
+    if not isinstance(maybe_tolerance_dict, dict):
+        return dict(rtol=None, atol=None)
+    tolerances = dict(rtol=0, atol=0)
+    tolerances.update(maybe_tolerance_dict)
+    return tolerances
+def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces closes results for inputs on GPU and CPU."""
+    if input.device.type != "cuda":
+        return
+    input_cuda = input.as_subclass(torch.Tensor)
+    input_cpu = input_cuda.to("cpu")
+    actual = kernel(input_cuda, *args, **kwargs)
+    expected = kernel(input_cpu, *args, **kwargs)
+    assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol)
+@cache
+def _script(fn):
+    try:
+        return torch.jit.script(fn)
+    except Exception as error:
+        raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error
+def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel is scriptable and if the scripted output is close to the eager one."""
+    if input.device.type != "cpu":
+        return
+    kernel_scripted = _script(kernel)
+    input = input.as_subclass(torch.Tensor)
+    with ignore_jit_no_profile_information_warning():
+        actual = kernel_scripted(input, *args, **kwargs)
+    expected = kernel(input, *args, **kwargs)
+    assert_close(actual, expected, rtol=rtol, atol=atol)
+def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces close results for batched and unbatched inputs."""
+    unbatched_input = input.as_subclass(torch.Tensor)
+    for batch_dims in [(2,), (2, 1)]:
+        repeats = [*batch_dims, *[1] * input.ndim]
+        actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs)
+        expected = kernel(unbatched_input, *args, **kwargs)
+        # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata
+        if isinstance(expected, torch.Tensor):
+            expected = expected.repeat(repeats)
+        else:
+            tensor, *metadata = expected
+            expected = (tensor.repeat(repeats), *metadata)
+        assert_close(actual, expected, rtol=rtol, atol=atol)
+    for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]:
+        degenerate_batched_input = torch.empty(
+            degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device
+        )
+        output = kernel(degenerate_batched_input, *args, **kwargs)
+        # Most kernels just return a tensor, but some also return some additional metadata
+        if not isinstance(output, torch.Tensor):
+            output, *_ = output
+        assert output.shape[: -input.ndim] == degenerate_batch_dims
+def check_kernel(
+    kernel,
+    input,
+    *args,
+    check_cuda_vs_cpu=True,
+    check_scripted_vs_eager=True,
+    check_batched_vs_unbatched=True,
+    **kwargs,
+):
+    initial_input_version = input._version
+    output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs)
+    # Most kernels just return a tensor, but some also return some additional metadata
+    if not isinstance(output, torch.Tensor):
+        output, *_ = output
+    # check that no inplace operation happened
+    assert input._version == initial_input_version
+    assert output.dtype == input.dtype
+    assert output.device == input.device
+    if check_cuda_vs_cpu:
+        _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu))
+    if check_scripted_vs_eager:
+        _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager))
+    if check_batched_vs_unbatched:
+        _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched))
+def _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs):
+    """Checks if the dispatcher can be scripted and the scripted version can be called without error."""
+    if not isinstance(input, datapoints.Image):
+        return
+    dispatcher_scripted = _script(dispatcher)
+    with ignore_jit_no_profile_information_warning():
+        dispatcher_scripted(input.as_subclass(torch.Tensor), *args, **kwargs)
+def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs):
+    """Checks if the dispatcher correctly dispatches the input to the corresponding kernel and that the input type is
+    preserved in doing so. For bounding boxes also checks that the format is preserved.
+    """
+    if isinstance(input, datapoints._datapoint.Datapoint):
+        # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly,
+        # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel.
+        spy = mock.MagicMock(wraps=kernel)
+        with mock.patch.object(F, kernel.__name__, spy):
+            # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class.
+            # Since that is not the case here, we need to prefix f"_{cls.__name__}"
+            # See https://docs.python.org/3/tutorial/classes.html#private-variables for details
+            with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F):
+                output = dispatcher(input, *args, **kwargs)
+        spy.assert_called_once()
+    else:
+        with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy:
+            output = dispatcher(input, *args, **kwargs)
+            spy.assert_called_once()
+    assert isinstance(output, type(input))
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+def check_dispatcher(
+    dispatcher,
+    kernel,
+    input,
+    *args,
+    check_scripted_smoke=True,
+    check_dispatch=True,
+    **kwargs,
+):
+    with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy:
+        dispatcher(input, *args, **kwargs)
+        spy.assert_any_call(f"{dispatcher.__module__}.{dispatcher.__name__}")
+    unknown_input = object()
+    with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))):
+        dispatcher(unknown_input, *args, **kwargs)
+    if check_scripted_smoke:
+        _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs)
+    if check_dispatch:
+        _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs)
+def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type):
+    """Checks if the signature of the dispatcher matches the kernel signature."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+    kernel_signature = inspect.signature(kernel)
+    kernel_params = list(kernel_signature.parameters.values())[1:]
+    if issubclass(input_type, datapoints._datapoint.Datapoint):
+        # We filter out metadata that is implicitly passed to the dispatcher through the input datapoint, but has to be
+        # explicitly passed to the kernel.
+        kernel_params = [param for param in kernel_params if param.name not in input_type.__annotations__.keys()]
+    dispatcher_params = iter(dispatcher_params)
+    for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params):
+        try:
+            # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out
+            # dispatcher parameters that have no kernel equivalent while keeping the order intact.
+            while dispatcher_param.name != kernel_param.name:
+                dispatcher_param = next(dispatcher_params)
+        except StopIteration:
+            raise AssertionError(
+                f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` "
+                f"has no corresponding parameter on the dispatcher `{dispatcher.__name__}`."
+            ) from None
+        if issubclass(input_type, PIL.Image.Image):
+            # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check
+            # them in the first place.
+            dispatcher_param._annotation = kernel_param._annotation = inspect.Parameter.empty
+        assert dispatcher_param == kernel_param
+def _check_dispatcher_datapoint_signature_match(dispatcher):
+    """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+    datapoint_method = getattr(datapoints._datapoint.Datapoint, dispatcher.__name__)
+    datapoint_signature = inspect.signature(datapoint_method)
+    datapoint_params = list(datapoint_signature.parameters.values())[1:]
+    # Some annotations in the `datapoints._datapoint` module
+    # are stored as strings. The block below makes them concrete again (non-strings), so they can be compared to the
+    # natively concrete dispatcher annotations.
+    datapoint_annotations = get_type_hints(datapoint_method)
+    for param in datapoint_params:
+        param._annotation = datapoint_annotations[param.name]
+    assert dispatcher_params == datapoint_params
+def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type):
+    _check_dispatcher_kernel_signature_match(dispatcher, kernel=kernel, input_type=input_type)
+    _check_dispatcher_datapoint_signature_match(dispatcher)
+def _check_transform_v1_compatibility(transform, input):
+    """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static
+    ``get_params`` method, is scriptable, and the scripted version can be called without error."""
+    if not hasattr(transform, "_v1_transform_cls"):
+        return
+    if type(input) is not torch.Tensor:
+        return
+    if hasattr(transform._v1_transform_cls, "get_params"):
+        assert type(transform).get_params is transform._v1_transform_cls.get_params
+    scripted_transform = _script(transform)
+    with ignore_jit_no_profile_information_warning():
+        scripted_transform(input)
+def check_transform(transform_cls, input, *args, **kwargs):
+    transform = transform_cls(*args, **kwargs)
+    output = transform(input)
+    assert isinstance(output, type(input))
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+    _check_transform_v1_compatibility(transform, input)
+def transform_cls_to_functional(transform_cls):
+    def wrapper(input, *args, **kwargs):
+        transform = transform_cls(*args, **kwargs)
+        return transform(input)
+    wrapper.__name__ = transform_cls.__name__
+    return wrapper
+# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well
+INTERPOLATION_MODES = [
+    transforms.InterpolationMode.NEAREST,
+    transforms.InterpolationMode.NEAREST_EXACT,
+    transforms.InterpolationMode.BILINEAR,
+    transforms.InterpolationMode.BICUBIC,
+]
+@contextlib.contextmanager
+def assert_warns_antialias_default_value():
+    with pytest.warns(UserWarning, match="The default value of the antialias parameter of all the resizing transforms"):
+        yield
+def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix):
+    def transform(bbox, affine_matrix_, format_, spatial_size_):
+        # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
+        in_dtype = bbox.dtype
+        if not torch.is_floating_point(bbox):
+            bbox = bbox.float()
+        bbox_xyxy = F.convert_format_bounding_box(
+            bbox.as_subclass(torch.Tensor),
+            old_format=format_,
+            new_format=datapoints.BoundingBoxFormat.XYXY,
+            inplace=True,
+        )
+        points = np.array(
+            [
+                [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
+            ]
+        )
+        transformed_points = np.matmul(points, affine_matrix_.T)
+        out_bbox = torch.tensor(
+            [
+                np.min(transformed_points[:, 0]).item(),
+                np.min(transformed_points[:, 1]).item(),
+                np.max(transformed_points[:, 0]).item(),
+                np.max(transformed_points[:, 1]).item(),
+            ],
+            dtype=bbox_xyxy.dtype,
+        )
+        out_bbox = F.convert_format_bounding_box(
+            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format_, inplace=True
+        )
+        # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
+        out_bbox = F.clamp_bounding_box(out_bbox, format=format_, spatial_size=spatial_size_)
+        out_bbox = out_bbox.to(dtype=in_dtype)
+        return out_bbox
+    if bounding_box.ndim < 2:
+        bounding_box = [bounding_box]
+    expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bounding_box]
+    if len(expected_bboxes) > 1:
+        expected_bboxes = torch.stack(expected_bboxes)
+    else:
+        expected_bboxes = expected_bboxes[0]
+    return expected_bboxes
+class TestResize:
+    INPUT_SIZE = (17, 11)
+    OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]
+    def _make_max_size_kwarg(self, *, use_max_size, size):
+        if use_max_size:
+            if not (isinstance(size, int) or len(size) == 1):
+                # This would result in an `ValueError`
+                return None
+            max_size = (size if isinstance(size, int) else size[0]) + 1
+        else:
+            max_size = None
+        return dict(max_size=max_size)
+    def _make_input(self, input_type, *, dtype=None, device="cpu", **kwargs):
+        if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}:
+            input = make_image(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+            if input_type is torch.Tensor:
+                input = input.as_subclass(torch.Tensor)
+            elif input_type is PIL.Image.Image:
+                input = F.to_image_pil(input)
+        elif input_type is datapoints.BoundingBox:
+            kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY)
+            input = make_bounding_box(
+                spatial_size=self.INPUT_SIZE,
+                dtype=dtype or torch.float32,
+                device=device,
+                **kwargs,
+            )
+        elif input_type is datapoints.Mask:
+            input = make_segmentation_mask(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+        elif input_type is datapoints.Video:
+            input = make_video(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs)
+        return input
+    def _compute_output_size(self, *, input_size, size, max_size):
+        if not (isinstance(size, int) or len(size) == 1):
+            return tuple(size)
+        if not isinstance(size, int):
+            size = size[0]
+        old_height, old_width = input_size
+        ratio = old_width / old_height
+        if ratio > 1:
+            new_height = size
+            new_width = int(ratio * new_height)
+        else:
+            new_width = size
+            new_height = int(new_width / ratio)
+        if max_size is not None and max(new_height, new_width) > max_size:
+            # Need to recompute the aspect ratio, since it might have changed due to rounding
+            ratio = new_width / new_height
+            if ratio > 1:
+                new_width = max_size
+                new_height = int(new_width / ratio)
+            else:
+                new_height = max_size
+                new_width = int(new_height * ratio)
+        return new_height, new_width
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("antialias", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+        # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA.
+        # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that.
+        atol = 30 if transforms.InterpolationMode.BICUBIC and dtype is torch.uint8 else 1
+        check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol)
+        check_kernel(
+            F.resize_image_tensor,
+            self._make_input(datapoints.Image, dtype=dtype, device=device),
+            size=size,
+            interpolation=interpolation,
+            **max_size_kwarg,
+            antialias=antialias,
+            check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+        bounding_box = self._make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format)
+        check_kernel(
+            F.resize_bounding_box,
+            bounding_box,
+            spatial_size=bounding_box.spatial_size,
+            size=size,
+            **max_size_kwarg,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+    @pytest.mark.parametrize(
+        "dtype_and_make_mask", [(torch.uint8, make_segmentation_mask), (torch.bool, make_detection_mask)]
+    )
+    def test_kernel_mask(self, dtype_and_make_mask):
+        dtype, make_mask = dtype_and_make_mask
+        check_kernel(F.resize_mask, make_mask(dtype=dtype), size=self.OUTPUT_SIZES[-1])
+    def test_kernel_video(self):
+        check_kernel(F.resize_video, self._make_input(datapoints.Video), size=self.OUTPUT_SIZES[-1], antialias=True)
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        "input_type_and_kernel",
+        [
+            (torch.Tensor, F.resize_image_tensor),
+            (PIL.Image.Image, F.resize_image_pil),
+            (datapoints.Image, F.resize_image_tensor),
+            (datapoints.BoundingBox, F.resize_bounding_box),
+            (datapoints.Mask, F.resize_mask),
+            (datapoints.Video, F.resize_video),
+        ],
+    )
+    def test_dispatcher(self, size, input_type_and_kernel):
+        input_type, kernel = input_type_and_kernel
+        check_dispatcher(
+            F.resize,
+            kernel,
+            self._make_input(input_type),
+            size=size,
+            antialias=True,
+            check_scripted_smoke=not isinstance(size, int),
+        )
+    @pytest.mark.parametrize(
+        ("input_type", "kernel"),
+        [
+            (torch.Tensor, F.resize_image_tensor),
+            (PIL.Image.Image, F.resize_image_pil),
+            (datapoints.Image, F.resize_image_tensor),
+            (datapoints.BoundingBox, F.resize_bounding_box),
+            (datapoints.Mask, F.resize_mask),
+            (datapoints.Video, F.resize_video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.resize, kernel=kernel, input_type=input_type)
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_transform(self, size, device, input_type):
+        input = self._make_input(input_type, device=device)
+        check_transform(
+            transforms.Resize,
+            input,
+            size=size,
+            antialias=True,
+        )
+    def _check_output_size(self, input, output, *, size, max_size):
+        assert tuple(F.get_spatial_size(output)) == self._compute_output_size(
+            input_size=F.get_spatial_size(input), size=size, max_size=max_size
+        )
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2.
+    # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT`
+    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_image_correctness(self, size, interpolation, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+        image = self._make_input(torch.Tensor, dtype=torch.uint8, device="cpu")
+        actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True)
+        expected = F.to_image_tensor(
+            F.resize(F.to_image_pil(image), size=size, interpolation=interpolation, **max_size_kwarg)
+        )
+        self._check_output_size(image, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected, atol=1, rtol=0)
+    def _reference_resize_bounding_box(self, bounding_box, *, size, max_size=None):
+        old_height, old_width = bounding_box.spatial_size
+        new_height, new_width = self._compute_output_size(
+            input_size=bounding_box.spatial_size, size=size, max_size=max_size
+        )
+        if (old_height, old_width) == (new_height, new_width):
+            return bounding_box
+        affine_matrix = np.array(
+            [
+                [new_width / old_width, 0, 0],
+                [0, new_height / old_height, 0],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=(new_height, new_width),
+            affine_matrix=affine_matrix,
+        )
+        return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes, spatial_size=(new_height, new_width))
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_bounding_box_correctness(self, format, size, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+        bounding_box = self._make_input(datapoints.BoundingBox)
+        actual = fn(bounding_box, size=size, **max_size_kwarg)
+        expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg)
+        self._check_output_size(bounding_box, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected)
+    @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES))
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video],
+    )
+    def test_pil_interpolation_compat_smoke(self, interpolation, input_type):
+        input = self._make_input(input_type)
+        with (
+            contextlib.nullcontext()
+            if isinstance(input, PIL.Image.Image)
+            # This error is triggered in PyTorch core
+            else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}")
+        ):
+            F.resize(
+                input,
+                size=self.OUTPUT_SIZES[0],
+                interpolation=interpolation,
+            )
+    def test_dispatcher_pil_antialias_warning(self):
+        with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"):
+            F.resize(self._make_input(PIL.Image.Image), size=self.OUTPUT_SIZES[0], antialias=False)
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_max_size_error(self, size, input_type):
+        if isinstance(size, int) or len(size) == 1:
+            max_size = (size if isinstance(size, int) else size[0]) - 1
+            match = "must be strictly greater than the requested size"
+        else:
+            # value can be anything other than None
+            max_size = -1
+            match = "size should be an int or a sequence of length 1"
+        with pytest.raises(ValueError, match=match):
+            F.resize(self._make_input(input_type), size=size, max_size=max_size, antialias=True)
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, datapoints.Image, datapoints.Video],
+    )
+    def test_antialias_warning(self, interpolation, input_type):
+        with (
+            assert_warns_antialias_default_value()
+            if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC}
+            else assert_no_warnings()
+        ):
+            F.resize(self._make_input(input_type), size=self.OUTPUT_SIZES[0], interpolation=interpolation)
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video],
+    )
+    def test_interpolation_int(self, interpolation, input_type):
+        # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to
+        # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a
+        # difference and thus we don't test it here.
+        if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT:
+            return
+        input = self._make_input(input_type)
+        expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True)
+        actual = F.resize(
+            input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True
+        )
+        assert_equal(actual, expected)
+    def test_transform_unknown_size_error(self):
+        with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"):
+            transforms.Resize(size=object())
+    @pytest.mark.parametrize(
+        "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)]
+    )
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_noop(self, size, input_type):
+        input = self._make_input(input_type)
+        output = F.resize(input, size=size, antialias=True)
+        # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there
+        # is a good reason to break this, feel free to downgrade to an equality check.
+        if isinstance(input, datapoints._datapoint.Datapoint):
+            # We can't test identity directly, since that checks for the identity of the Python object. Since all
+            # datapoints unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check
+            # that the underlying storage is the same
+            assert output.data_ptr() == input.data_ptr()
+        else:
+            assert output is input
+    @pytest.mark.parametrize(
+        "input_type",
+        [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video],
+    )
+    def test_no_regression_5405(self, input_type):
+        # Checks that `max_size` is not ignored if `size == small_edge_size`
+        # See https://github.com/pytorch/vision/issues/5405
+        input = self._make_input(input_type)
+        size = min(F.get_spatial_size(input))
+        max_size = size + 1
+        output = F.resize(input, size=size, max_size=max_size, antialias=True)
+        assert max(F.get_spatial_size(output)) == max_size
--- a/test/transforms_v2_dispatcher_infos.py
+++ b/test/transforms_v2_dispatcher_infos.py
@@ -148,19 +148,6 @@ DISPATCHER_INFOS = [
        },
        pil_kernel_info=PILKernelInfo(F.horizontal_flip_image_pil, kernel_name="horizontal_flip_image_pil"),
    ),
-    DispatcherInfo(
-        F.resize,
-        kernels={
-            datapoints.Image: F.resize_image_tensor,
-            datapoints.Video: F.resize_video,
-            datapoints.BoundingBox: F.resize_bounding_box,
-            datapoints.Mask: F.resize_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.resize_image_pil),
-        test_marks=[
-            xfail_jit_python_scalar_arg("size"),
-        ],
-    ),
    DispatcherInfo(
        F.affine,
        kernels={

--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -238,179 +238,6 @@ KERNEL_INFOS.extend(
 )
-def _get_resize_sizes(spatial_size):
-    height, width = spatial_size
-    length = max(spatial_size)
-    yield length
-    yield [length]
-    yield (length,)
-    new_height = int(height * 0.75)
-    new_width = int(width * 1.25)
-    yield [new_height, new_width]
-    yield height, width
-def sample_inputs_resize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(image_loader, size=size)
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]),
-        [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR],
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-    yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25)
-def sample_inputs_resize_image_tensor_bicubic():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC]
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-@pil_reference_wrapper
-def reference_resize_image_tensor(*args, **kwargs):
-    if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in {
-        F.InterpolationMode.BILINEAR,
-        F.InterpolationMode.BICUBIC,
-    }:
-        raise pytest.UsageError("Anti-aliasing is always active in PIL")
-    return F.resize_image_pil(*args, **kwargs)
-def reference_inputs_resize_image_tensor():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders_for_interpolation(),
-        [
-            F.InterpolationMode.NEAREST,
-            F.InterpolationMode.NEAREST_EXACT,
-            F.InterpolationMode.BILINEAR,
-            F.InterpolationMode.BICUBIC,
-        ],
-    ):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(
-                image_loader,
-                size=size,
-                interpolation=interpolation,
-                antialias=interpolation
-                in {
-                    F.InterpolationMode.BILINEAR,
-                    F.InterpolationMode.BICUBIC,
-                },
-            )
-def sample_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders():
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, spatial_size=bounding_box_loader.spatial_size, size=size)
-def sample_inputs_resize_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
-        yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])
-def sample_inputs_resize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
-def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=None):
-    old_height, old_width = spatial_size
-    new_height, new_width = F._geometry._compute_resized_output_size(spatial_size, size=size, max_size=max_size)
-    if (old_height, old_width) == (new_height, new_width):
-        return bounding_box, (old_height, old_width)
-    affine_matrix = np.array(
-        [
-            [new_width / old_width, 0, 0],
-            [0, new_height / old_height, 0],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box,
-        format=bounding_box.format,
-        spatial_size=(new_height, new_width),
-        affine_matrix=affine_matrix,
-    )
-    return expected_bboxes, (new_height, new_width)
-def reference_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(extra_dims=((), (4,))):
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size)
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(atol=30),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_bounding_box,
-            sample_inputs_fn=sample_inputs_resize_bounding_box,
-            reference_fn=reference_resize_bounding_box,
-            reference_inputs_fn=reference_inputs_resize_bounding_box,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_mask,
-            sample_inputs_fn=sample_inputs_resize_mask,
-            closeness_kwargs=pil_reference_pixel_difference(10),
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_video,
-            sample_inputs_fn=sample_inputs_resize_video,
-            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
-        ),
-    ]
-)
 _AFFINE_KWARGS = combinations_grid(
    angle=[-87, 15, 90],
    translate=[(5, 5), (-5, -5)],