[proto] Improvements for functional API and tests (#6187)

* Added base tests for rotate_image_tensor * Updated resize_image_tensor API and tests and fixed a bug with max_size * Refactored and modified private api for resize functional op * Fixed failures * More updates * Updated proto functional op: resize_image_* * Added max_size arg to resize_bounding_box and updated basic tests * Update functional.py * Reverted fill/center order for rotate Other nits

[proto] Improvements for functional API and tests (#6187)
* Added base tests for rotate_image_tensor * Updated resize_image_tensor API and tests and fixed a bug with max_size * Refactored and modified private api for resize functional op * Fixed failures * More updates * Updated proto functional op: resize_image_* * Added max_size arg to resize_bounding_box and updated basic tests * Update functional.py * Reverted fill/center order for rotate Other nits
6155808f · vfdev · GitHub · aeafa912 · 6155808f · 6155808f
Unverified Commit 6155808f authored Jun 23, 2022 by vfdev Committed by GitHub Jun 23, 2022
Showing with 66 additions and 17 deletions

test/test_prototype_transforms_functional.py test/test_prototype_transforms_functional.py +49 -7

torchvision/prototype/transforms/functional/_geometry.py torchvision/prototype/transforms/functional/_geometry.py +17 -10

No files found.
--- a/test/test_prototype_transforms_functional.py
+++ b/test/test_prototype_transforms_functional.py
@@ -201,32 +201,58 @@ def horizontal_flip_bounding_box():
 @register_kernel_info_from_sample_inputs_fn
 def resize_image_tensor():
-    for image, interpolation in itertools.product(
+    for image, interpolation, max_size, antialias in itertools.product(
        make_images(),
-        [
+        [F.InterpolationMode.BILINEAR, F.InterpolationMode.NEAREST],  # interpolation
-            F.InterpolationMode.BILINEAR,
+        [None, 34],  # max_size
-            F.InterpolationMode.NEAREST,
+        [False, True],  # antialias
-        ],
    ):
+        if antialias and interpolation == F.InterpolationMode.NEAREST:
+            continue
        height, width = image.shape[-2:]
        for size in [
            (height, width),
            (int(height * 0.75), int(width * 1.25)),
        ]:
-            yield SampleInput(image, size=size, interpolation=interpolation)
+            if max_size is not None:
+                size = [size[0]]
+            yield SampleInput(image, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias)
 @register_kernel_info_from_sample_inputs_fn
 def resize_bounding_box():
-    for bounding_box in make_bounding_boxes():
+    for bounding_box, max_size in itertools.product(
+        make_bounding_boxes(),
+        [None, 34],  # max_size
+    ):
        height, width = bounding_box.image_size
        for size in [
            (height, width),
            (int(height * 0.75), int(width * 1.25)),
        ]:
+            if max_size is not None:
+                size = [size[0]]
            yield SampleInput(bounding_box, size=size, image_size=bounding_box.image_size)
+@register_kernel_info_from_sample_inputs_fn
+def resize_segmentation_mask():
+    for mask, max_size in itertools.product(
+        make_segmentation_masks(),
+        [None, 34],  # max_size
+    ):
+        height, width = mask.shape[-2:]
+        for size in [
+            (height, width),
+            (int(height * 0.75), int(width * 1.25)),
+        ]:
+            if max_size is not None:
+                size = [size[0]]
+            yield SampleInput(mask, size=size, max_size=max_size)
 @register_kernel_info_from_sample_inputs_fn
 def affine_image_tensor():
    for image, angle, translate, scale, shear in itertools.product(
@@ -284,6 +310,22 @@ def affine_segmentation_mask():
        )
+@register_kernel_info_from_sample_inputs_fn
+def rotate_image_tensor():
+    for image, angle, expand, center, fill in itertools.product(
+        make_images(extra_dims=((), (4,))),
+        [-87, 15, 90],  # angle
+        [True, False],  # expand
+        [None, [12, 23]],  # center
+        [None, [128]],  # fill
+    ):
+        if center is not None and expand:
+            # Skip warning: The provided center argument is ignored if expand is True
+            continue
+        yield SampleInput(image, angle=angle, expand=expand, center=center, fill=fill)
 @register_kernel_info_from_sample_inputs_fn
 def rotate_bounding_box():
    for bounding_box, angle, expand, center in itertools.product(

--- a/torchvision/prototype/transforms/functional/_geometry.py
+++ b/torchvision/prototype/transforms/functional/_geometry.py
@@ -6,7 +6,12 @@ import PIL.Image
 import torch
 from torchvision.prototype import features
 from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP
-from torchvision.transforms.functional import pil_modes_mapping, _get_inverse_affine_matrix, InterpolationMode
+from torchvision.transforms.functional import (
+    pil_modes_mapping,
+    _get_inverse_affine_matrix,
+    InterpolationMode,
+    _compute_output_size,
+)
 from ._meta import convert_bounding_box_format, get_dimensions_image_tensor, get_dimensions_image_pil
@@ -42,14 +47,12 @@ def resize_image_tensor(
    max_size: Optional[int] = None,
    antialias: Optional[bool] = None,
 ) -> torch.Tensor:
-    # TODO: use _compute_output_size to enable max_size option
-    max_size  # ununsed right now
-    new_height, new_width = size
    num_channels, old_height, old_width = get_dimensions_image_tensor(image)
+    new_height, new_width = _compute_output_size((old_height, old_width), size=size, max_size=max_size)
    batch_shape = image.shape[:-3]
    return _FT.resize(
        image.reshape((-1, num_channels, old_height, old_width)),
-        size=size,
+        size=[new_height, new_width],
        interpolation=interpolation.value,
        antialias=antialias,
    ).reshape(batch_shape + (num_channels, new_height, new_width))
@@ -61,8 +64,11 @@ def resize_image_pil(
    interpolation: InterpolationMode = InterpolationMode.BILINEAR,
    max_size: Optional[int] = None,
 ) -> PIL.Image.Image:
-    # TODO: use _compute_output_size to enable max_size option
+    if isinstance(size, int):
-    max_size  # ununsed right now
+        size = [size, size]
+    # Explicitly cast size to list otherwise mypy issue: incompatible type "Sequence[int]"; expected "List[int]"
+    size: List[int] = list(size)
+    size = _compute_output_size(img.size[::-1], size=size, max_size=max_size)
    return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation])
@@ -72,10 +78,11 @@ def resize_segmentation_mask(
    return resize_image_tensor(segmentation_mask, size=size, interpolation=InterpolationMode.NEAREST, max_size=max_size)
-# TODO: handle max_size
+def resize_bounding_box(
-def resize_bounding_box(bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int]) -> torch.Tensor:
+    bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None
+) -> torch.Tensor:
    old_height, old_width = image_size
-    new_height, new_width = size
+    new_height, new_width = _compute_output_size(image_size, size=size, max_size=max_size)
    ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device)
    return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape)