"vscode:/vscode.git/clone" did not exist on "8918cce0b56bca7b0252ae9ba67745f5d35da73e"
Unverified Commit 6155808f authored by vfdev's avatar vfdev Committed by GitHub
Browse files

[proto] Improvements for functional API and tests (#6187)

* Added base tests for rotate_image_tensor

* Updated resize_image_tensor API and tests and fixed a bug with max_size

* Refactored and modified private api for resize functional op

* Fixed failures

* More updates

* Updated proto functional op: resize_image_*

* Added max_size arg to resize_bounding_box and updated basic tests

* Update functional.py

* Reverted fill/center order for rotate
Other nits
parent aeafa912
...@@ -201,32 +201,58 @@ def horizontal_flip_bounding_box(): ...@@ -201,32 +201,58 @@ def horizontal_flip_bounding_box():
@register_kernel_info_from_sample_inputs_fn @register_kernel_info_from_sample_inputs_fn
def resize_image_tensor(): def resize_image_tensor():
for image, interpolation in itertools.product( for image, interpolation, max_size, antialias in itertools.product(
make_images(), make_images(),
[ [F.InterpolationMode.BILINEAR, F.InterpolationMode.NEAREST], # interpolation
F.InterpolationMode.BILINEAR, [None, 34], # max_size
F.InterpolationMode.NEAREST, [False, True], # antialias
],
): ):
if antialias and interpolation == F.InterpolationMode.NEAREST:
continue
height, width = image.shape[-2:] height, width = image.shape[-2:]
for size in [ for size in [
(height, width), (height, width),
(int(height * 0.75), int(width * 1.25)), (int(height * 0.75), int(width * 1.25)),
]: ]:
yield SampleInput(image, size=size, interpolation=interpolation) if max_size is not None:
size = [size[0]]
yield SampleInput(image, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias)
@register_kernel_info_from_sample_inputs_fn @register_kernel_info_from_sample_inputs_fn
def resize_bounding_box(): def resize_bounding_box():
for bounding_box in make_bounding_boxes(): for bounding_box, max_size in itertools.product(
make_bounding_boxes(),
[None, 34], # max_size
):
height, width = bounding_box.image_size height, width = bounding_box.image_size
for size in [ for size in [
(height, width), (height, width),
(int(height * 0.75), int(width * 1.25)), (int(height * 0.75), int(width * 1.25)),
]: ]:
if max_size is not None:
size = [size[0]]
yield SampleInput(bounding_box, size=size, image_size=bounding_box.image_size) yield SampleInput(bounding_box, size=size, image_size=bounding_box.image_size)
@register_kernel_info_from_sample_inputs_fn
def resize_segmentation_mask():
for mask, max_size in itertools.product(
make_segmentation_masks(),
[None, 34], # max_size
):
height, width = mask.shape[-2:]
for size in [
(height, width),
(int(height * 0.75), int(width * 1.25)),
]:
if max_size is not None:
size = [size[0]]
yield SampleInput(mask, size=size, max_size=max_size)
@register_kernel_info_from_sample_inputs_fn @register_kernel_info_from_sample_inputs_fn
def affine_image_tensor(): def affine_image_tensor():
for image, angle, translate, scale, shear in itertools.product( for image, angle, translate, scale, shear in itertools.product(
...@@ -284,6 +310,22 @@ def affine_segmentation_mask(): ...@@ -284,6 +310,22 @@ def affine_segmentation_mask():
) )
@register_kernel_info_from_sample_inputs_fn
def rotate_image_tensor():
for image, angle, expand, center, fill in itertools.product(
make_images(extra_dims=((), (4,))),
[-87, 15, 90], # angle
[True, False], # expand
[None, [12, 23]], # center
[None, [128]], # fill
):
if center is not None and expand:
# Skip warning: The provided center argument is ignored if expand is True
continue
yield SampleInput(image, angle=angle, expand=expand, center=center, fill=fill)
@register_kernel_info_from_sample_inputs_fn @register_kernel_info_from_sample_inputs_fn
def rotate_bounding_box(): def rotate_bounding_box():
for bounding_box, angle, expand, center in itertools.product( for bounding_box, angle, expand, center in itertools.product(
......
...@@ -6,7 +6,12 @@ import PIL.Image ...@@ -6,7 +6,12 @@ import PIL.Image
import torch import torch
from torchvision.prototype import features from torchvision.prototype import features
from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP
from torchvision.transforms.functional import pil_modes_mapping, _get_inverse_affine_matrix, InterpolationMode from torchvision.transforms.functional import (
pil_modes_mapping,
_get_inverse_affine_matrix,
InterpolationMode,
_compute_output_size,
)
from ._meta import convert_bounding_box_format, get_dimensions_image_tensor, get_dimensions_image_pil from ._meta import convert_bounding_box_format, get_dimensions_image_tensor, get_dimensions_image_pil
...@@ -42,14 +47,12 @@ def resize_image_tensor( ...@@ -42,14 +47,12 @@ def resize_image_tensor(
max_size: Optional[int] = None, max_size: Optional[int] = None,
antialias: Optional[bool] = None, antialias: Optional[bool] = None,
) -> torch.Tensor: ) -> torch.Tensor:
# TODO: use _compute_output_size to enable max_size option
max_size # ununsed right now
new_height, new_width = size
num_channels, old_height, old_width = get_dimensions_image_tensor(image) num_channels, old_height, old_width = get_dimensions_image_tensor(image)
new_height, new_width = _compute_output_size((old_height, old_width), size=size, max_size=max_size)
batch_shape = image.shape[:-3] batch_shape = image.shape[:-3]
return _FT.resize( return _FT.resize(
image.reshape((-1, num_channels, old_height, old_width)), image.reshape((-1, num_channels, old_height, old_width)),
size=size, size=[new_height, new_width],
interpolation=interpolation.value, interpolation=interpolation.value,
antialias=antialias, antialias=antialias,
).reshape(batch_shape + (num_channels, new_height, new_width)) ).reshape(batch_shape + (num_channels, new_height, new_width))
...@@ -61,8 +64,11 @@ def resize_image_pil( ...@@ -61,8 +64,11 @@ def resize_image_pil(
interpolation: InterpolationMode = InterpolationMode.BILINEAR, interpolation: InterpolationMode = InterpolationMode.BILINEAR,
max_size: Optional[int] = None, max_size: Optional[int] = None,
) -> PIL.Image.Image: ) -> PIL.Image.Image:
# TODO: use _compute_output_size to enable max_size option if isinstance(size, int):
max_size # ununsed right now size = [size, size]
# Explicitly cast size to list otherwise mypy issue: incompatible type "Sequence[int]"; expected "List[int]"
size: List[int] = list(size)
size = _compute_output_size(img.size[::-1], size=size, max_size=max_size)
return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation]) return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation])
...@@ -72,10 +78,11 @@ def resize_segmentation_mask( ...@@ -72,10 +78,11 @@ def resize_segmentation_mask(
return resize_image_tensor(segmentation_mask, size=size, interpolation=InterpolationMode.NEAREST, max_size=max_size) return resize_image_tensor(segmentation_mask, size=size, interpolation=InterpolationMode.NEAREST, max_size=max_size)
# TODO: handle max_size def resize_bounding_box(
def resize_bounding_box(bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int]) -> torch.Tensor: bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None
) -> torch.Tensor:
old_height, old_width = image_size old_height, old_width = image_size
new_height, new_width = size new_height, new_width = _compute_output_size(image_size, size=size, max_size=max_size)
ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device) ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device)
return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape) return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment