"doc/git@developer.sourcefind.cn:OpenDAS/ktransformers.git" did not exist on "ad2c52d72a996732bdcd3f6bfe2afbdaa4a2b19e"
Unverified Commit 50b77fa7 authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

use non-random images for interpolation kernels for testing (#6977)

* use non-random images for interpolation kernels for testing

* use real image rather than artificial

* cleanup
parent 346f6dd9
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import collections.abc import collections.abc
import dataclasses import dataclasses
import functools import functools
import pathlib
from collections import defaultdict from collections import defaultdict
from typing import Callable, Optional, Sequence, Tuple, Union from typing import Callable, Optional, Sequence, Tuple, Union
...@@ -14,7 +15,7 @@ from datasets_utils import combinations_grid ...@@ -14,7 +15,7 @@ from datasets_utils import combinations_grid
from torch.nn.functional import one_hot from torch.nn.functional import one_hot
from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair from torch.testing._comparison import assert_equal as _assert_equal, BooleanPair, NonePair, NumberPair, TensorLikePair
from torchvision.prototype import features from torchvision.prototype import features
from torchvision.prototype.transforms.functional import to_image_tensor from torchvision.prototype.transforms.functional import convert_dtype_image_tensor, to_image_tensor
from torchvision.transforms.functional_tensor import _max_value as get_max_value from torchvision.transforms.functional_tensor import _max_value as get_max_value
__all__ = [ __all__ = [
...@@ -69,6 +70,7 @@ class ImagePair(TensorLikePair): ...@@ -69,6 +70,7 @@ class ImagePair(TensorLikePair):
self._compare_attributes(actual, expected) self._compare_attributes(actual, expected)
actual, expected = self._equalize_attributes(actual, expected) actual, expected = self._equalize_attributes(actual, expected)
actual, expected = self._promote_for_comparison(actual, expected)
abs_diff = torch.abs(actual - expected) abs_diff = torch.abs(actual - expected)
if self.allowed_percentage_diff is not None: if self.allowed_percentage_diff is not None:
...@@ -313,6 +315,42 @@ def make_image_loaders( ...@@ -313,6 +315,42 @@ def make_image_loaders(
make_images = from_loaders(make_image_loaders) make_images = from_loaders(make_image_loaders)
def make_image_loader_for_interpolation(size="random", *, color_space=features.ColorSpace.RGB, dtype=torch.uint8):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)
def fn(shape, dtype, device):
height, width = shape[-2:]
image_pil = (
PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
.resize((width, height))
.convert(
{
features.ColorSpace.GRAY: "L",
features.ColorSpace.GRAY_ALPHA: "LA",
features.ColorSpace.RGB: "RGB",
features.ColorSpace.RGB_ALPHA: "RGBA",
}[color_space]
)
)
image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype)
return features.Image(image_tensor, color_space=color_space)
return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, color_space=color_space)
def make_image_loaders_for_interpolation(
sizes=((233, 147),),
color_spaces=(features.ColorSpace.RGB,),
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes):
yield make_image_loader_for_interpolation(**params)
@dataclasses.dataclass @dataclasses.dataclass
class BoundingBoxLoader(TensorLoader): class BoundingBoxLoader(TensorLoader):
format: features.BoundingBoxFormat format: features.BoundingBoxFormat
......
...@@ -19,6 +19,7 @@ from prototype_common_utils import ( ...@@ -19,6 +19,7 @@ from prototype_common_utils import (
make_bounding_box_loaders, make_bounding_box_loaders,
make_image_loader, make_image_loader,
make_image_loaders, make_image_loaders,
make_image_loaders_for_interpolation,
make_mask_loaders, make_mask_loaders,
make_video_loaders, make_video_loaders,
mark_framework_limitation, mark_framework_limitation,
...@@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs): ...@@ -287,7 +288,7 @@ def reference_resize_image_tensor(*args, **kwargs):
def reference_inputs_resize_image_tensor(): def reference_inputs_resize_image_tensor():
for image_loader, interpolation in itertools.product( for image_loader, interpolation in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), make_image_loaders_for_interpolation(),
[ [
F.InterpolationMode.NEAREST, F.InterpolationMode.NEAREST,
F.InterpolationMode.NEAREST_EXACT, F.InterpolationMode.NEAREST_EXACT,
...@@ -319,17 +320,6 @@ def sample_inputs_resize_mask(): ...@@ -319,17 +320,6 @@ def sample_inputs_resize_mask():
yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])
@pil_reference_wrapper
def reference_resize_mask(*args, **kwargs):
return F.resize_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
def reference_inputs_resize_mask():
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
for size in _get_resize_sizes(mask_loader.shape[-2:]):
yield ArgsKwargs(mask_loader, size=size)
def sample_inputs_resize_video(): def sample_inputs_resize_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1]) yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
...@@ -369,11 +359,9 @@ KERNEL_INFOS.extend( ...@@ -369,11 +359,9 @@ KERNEL_INFOS.extend(
reference_inputs_fn=reference_inputs_resize_image_tensor, reference_inputs_fn=reference_inputs_resize_image_tensor,
float32_vs_uint8=True, float32_vs_uint8=True,
closeness_kwargs={ closeness_kwargs={
# TODO: investigate **pil_reference_pixel_difference(10, agg_method="mean"),
**pil_reference_pixel_difference(110, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(), **cuda_vs_cpu_pixel_difference(),
# TODO: investigate **float32_vs_uint8_pixel_difference(1, agg_method="mean"),
**float32_vs_uint8_pixel_difference(50),
}, },
test_marks=[ test_marks=[
xfail_jit_python_scalar_arg("size"), xfail_jit_python_scalar_arg("size"),
...@@ -391,9 +379,6 @@ KERNEL_INFOS.extend( ...@@ -391,9 +379,6 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.resize_mask, F.resize_mask,
sample_inputs_fn=sample_inputs_resize_mask, sample_inputs_fn=sample_inputs_resize_mask,
reference_fn=reference_resize_mask,
reference_inputs_fn=reference_inputs_resize_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10), closeness_kwargs=pil_reference_pixel_difference(10),
test_marks=[ test_marks=[
xfail_jit_python_scalar_arg("size"), xfail_jit_python_scalar_arg("size"),
...@@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor(): ...@@ -501,9 +486,7 @@ def sample_inputs_affine_image_tensor():
def reference_inputs_affine_image_tensor(): def reference_inputs_affine_image_tensor():
for image_loader, affine_kwargs in itertools.product( for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS):
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _AFFINE_KWARGS
):
yield ArgsKwargs( yield ArgsKwargs(
image_loader, image_loader,
interpolation=F.InterpolationMode.NEAREST, interpolation=F.InterpolationMode.NEAREST,
...@@ -617,18 +600,6 @@ def sample_inputs_affine_mask(): ...@@ -617,18 +600,6 @@ def sample_inputs_affine_mask():
yield ArgsKwargs(mask_loader, **_full_affine_params()) yield ArgsKwargs(mask_loader, **_full_affine_params())
@pil_reference_wrapper
def reference_affine_mask(*args, **kwargs):
return F.affine_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
def reference_inputs_resize_mask():
for mask_loader, affine_kwargs in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _AFFINE_KWARGS
):
yield ArgsKwargs(mask_loader, **affine_kwargs)
def sample_inputs_affine_video(): def sample_inputs_affine_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, **_full_affine_params()) yield ArgsKwargs(video_loader, **_full_affine_params())
...@@ -665,10 +636,6 @@ KERNEL_INFOS.extend( ...@@ -665,10 +636,6 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.affine_mask, F.affine_mask,
sample_inputs_fn=sample_inputs_affine_mask, sample_inputs_fn=sample_inputs_affine_mask,
reference_fn=reference_affine_mask,
reference_inputs_fn=reference_inputs_resize_mask,
closeness_kwargs=pil_reference_pixel_difference(10),
float32_vs_uint8=True,
test_marks=[ test_marks=[
xfail_jit_python_scalar_arg("shear"), xfail_jit_python_scalar_arg("shear"),
], ],
...@@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor(): ...@@ -870,9 +837,7 @@ def sample_inputs_rotate_image_tensor():
def reference_inputs_rotate_image_tensor(): def reference_inputs_rotate_image_tensor():
for image_loader, angle in itertools.product( for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES):
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _ROTATE_ANGLES
):
yield ArgsKwargs(image_loader, angle=angle) yield ArgsKwargs(image_loader, angle=angle)
...@@ -891,16 +856,6 @@ def sample_inputs_rotate_mask(): ...@@ -891,16 +856,6 @@ def sample_inputs_rotate_mask():
yield ArgsKwargs(mask_loader, angle=15.0) yield ArgsKwargs(mask_loader, angle=15.0)
@pil_reference_wrapper
def reference_rotate_mask(*args, **kwargs):
return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs)
def reference_inputs_rotate_mask():
for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES):
yield ArgsKwargs(mask_loader, angle=angle)
def sample_inputs_rotate_video(): def sample_inputs_rotate_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, angle=15.0) yield ArgsKwargs(video_loader, angle=15.0)
...@@ -914,8 +869,7 @@ KERNEL_INFOS.extend( ...@@ -914,8 +869,7 @@ KERNEL_INFOS.extend(
reference_fn=pil_reference_wrapper(F.rotate_image_pil), reference_fn=pil_reference_wrapper(F.rotate_image_pil),
reference_inputs_fn=reference_inputs_rotate_image_tensor, reference_inputs_fn=reference_inputs_rotate_image_tensor,
float32_vs_uint8=True, float32_vs_uint8=True,
# TODO: investigate closeness_kwargs=pil_reference_pixel_difference(1, agg_method="mean"),
closeness_kwargs=pil_reference_pixel_difference(110, agg_method="mean"),
test_marks=[ test_marks=[
xfail_jit_tuple_instead_of_list("fill"), xfail_jit_tuple_instead_of_list("fill"),
# TODO: check if this is a regression since it seems that should be supported if `int` is ok # TODO: check if this is a regression since it seems that should be supported if `int` is ok
...@@ -929,10 +883,6 @@ KERNEL_INFOS.extend( ...@@ -929,10 +883,6 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.rotate_mask, F.rotate_mask,
sample_inputs_fn=sample_inputs_rotate_mask, sample_inputs_fn=sample_inputs_rotate_mask,
reference_fn=reference_rotate_mask,
reference_inputs_fn=reference_inputs_rotate_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10),
), ),
KernelInfo( KernelInfo(
F.rotate_video, F.rotate_video,
...@@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs): ...@@ -1058,7 +1008,7 @@ def reference_resized_crop_image_tensor(*args, **kwargs):
def reference_inputs_resized_crop_image_tensor(): def reference_inputs_resized_crop_image_tensor():
for image_loader, interpolation, params in itertools.product( for image_loader, interpolation, params in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), make_image_loaders_for_interpolation(),
[ [
F.InterpolationMode.NEAREST, F.InterpolationMode.NEAREST,
F.InterpolationMode.NEAREST_EXACT, F.InterpolationMode.NEAREST_EXACT,
...@@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask(): ...@@ -1089,13 +1039,6 @@ def sample_inputs_resized_crop_mask():
yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0]) yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0])
def reference_inputs_resized_crop_mask():
for mask_loader, params in itertools.product(
make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS
):
yield ArgsKwargs(mask_loader, **params)
def sample_inputs_resized_crop_video(): def sample_inputs_resized_crop_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0])
...@@ -1110,11 +1053,9 @@ KERNEL_INFOS.extend( ...@@ -1110,11 +1053,9 @@ KERNEL_INFOS.extend(
reference_inputs_fn=reference_inputs_resized_crop_image_tensor, reference_inputs_fn=reference_inputs_resized_crop_image_tensor,
float32_vs_uint8=True, float32_vs_uint8=True,
closeness_kwargs={ closeness_kwargs={
# TODO: investigate
**pil_reference_pixel_difference(60, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(), **cuda_vs_cpu_pixel_difference(),
# TODO: investigate **pil_reference_pixel_difference(3, agg_method="mean"),
**float32_vs_uint8_pixel_difference(50), **float32_vs_uint8_pixel_difference(3, agg_method="mean"),
}, },
), ),
KernelInfo( KernelInfo(
...@@ -1124,10 +1065,6 @@ KERNEL_INFOS.extend( ...@@ -1124,10 +1065,6 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.resized_crop_mask, F.resized_crop_mask,
sample_inputs_fn=sample_inputs_resized_crop_mask, sample_inputs_fn=sample_inputs_resized_crop_mask,
reference_fn=pil_reference_wrapper(F.resized_crop_image_pil),
reference_inputs_fn=reference_inputs_resized_crop_mask,
float32_vs_uint8=True,
closeness_kwargs=pil_reference_pixel_difference(10),
), ),
KernelInfo( KernelInfo(
F.resized_crop_video, F.resized_crop_video,
...@@ -1298,12 +1235,24 @@ def sample_inputs_perspective_image_tensor(): ...@@ -1298,12 +1235,24 @@ def sample_inputs_perspective_image_tensor():
def reference_inputs_perspective_image_tensor(): def reference_inputs_perspective_image_tensor():
for image_loader, coefficients in itertools.product( for image_loader, coefficients, interpolation in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _PERSPECTIVE_COEFFS make_image_loaders_for_interpolation(),
_PERSPECTIVE_COEFFS,
[
F.InterpolationMode.NEAREST,
F.InterpolationMode.BILINEAR,
],
): ):
# FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it?
for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
yield ArgsKwargs(image_loader, None, None, fill=fill, coefficients=coefficients) yield ArgsKwargs(
image_loader,
startpoints=None,
endpoints=None,
interpolation=interpolation,
fill=fill,
coefficients=coefficients,
)
def sample_inputs_perspective_bounding_box(): def sample_inputs_perspective_bounding_box():
...@@ -1339,8 +1288,7 @@ KERNEL_INFOS.extend( ...@@ -1339,8 +1288,7 @@ KERNEL_INFOS.extend(
reference_inputs_fn=reference_inputs_perspective_image_tensor, reference_inputs_fn=reference_inputs_perspective_image_tensor,
float32_vs_uint8=float32_vs_uint8_fill_adapter, float32_vs_uint8=float32_vs_uint8_fill_adapter,
closeness_kwargs={ closeness_kwargs={
# TODO: investigate **pil_reference_pixel_difference(2, agg_method="mean"),
**pil_reference_pixel_difference(160, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(), **cuda_vs_cpu_pixel_difference(),
**float32_vs_uint8_pixel_difference(), **float32_vs_uint8_pixel_difference(),
}, },
...@@ -1381,7 +1329,7 @@ def sample_inputs_elastic_image_tensor(): ...@@ -1381,7 +1329,7 @@ def sample_inputs_elastic_image_tensor():
def reference_inputs_elastic_image_tensor(): def reference_inputs_elastic_image_tensor():
for image_loader, interpolation in itertools.product( for image_loader, interpolation in itertools.product(
make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), make_image_loaders_for_interpolation(),
[ [
F.InterpolationMode.NEAREST, F.InterpolationMode.NEAREST,
F.InterpolationMode.BILINEAR, F.InterpolationMode.BILINEAR,
...@@ -1409,12 +1357,6 @@ def sample_inputs_elastic_mask(): ...@@ -1409,12 +1357,6 @@ def sample_inputs_elastic_mask():
yield ArgsKwargs(mask_loader, displacement=displacement) yield ArgsKwargs(mask_loader, displacement=displacement)
def reference_inputs_elastic_mask():
for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]):
displacement = _get_elastic_displacement(mask_loader.shape[-2:])
yield ArgsKwargs(mask_loader, displacement=displacement)
def sample_inputs_elastic_video(): def sample_inputs_elastic_video():
for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
displacement = _get_elastic_displacement(video_loader.shape[-2:]) displacement = _get_elastic_displacement(video_loader.shape[-2:])
...@@ -1426,11 +1368,12 @@ KERNEL_INFOS.extend( ...@@ -1426,11 +1368,12 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.elastic_image_tensor, F.elastic_image_tensor,
sample_inputs_fn=sample_inputs_elastic_image_tensor, sample_inputs_fn=sample_inputs_elastic_image_tensor,
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
reference_inputs_fn=reference_inputs_elastic_image_tensor, reference_inputs_fn=reference_inputs_elastic_image_tensor,
float32_vs_uint8=float32_vs_uint8_fill_adapter, float32_vs_uint8=float32_vs_uint8_fill_adapter,
# TODO: investigate closeness_kwargs={
closeness_kwargs=float32_vs_uint8_pixel_difference(60, agg_method="mean"), **float32_vs_uint8_pixel_difference(6, agg_method="mean"),
**cuda_vs_cpu_pixel_difference(),
},
), ),
KernelInfo( KernelInfo(
F.elastic_bounding_box, F.elastic_bounding_box,
...@@ -1439,15 +1382,11 @@ KERNEL_INFOS.extend( ...@@ -1439,15 +1382,11 @@ KERNEL_INFOS.extend(
KernelInfo( KernelInfo(
F.elastic_mask, F.elastic_mask,
sample_inputs_fn=sample_inputs_elastic_mask, sample_inputs_fn=sample_inputs_elastic_mask,
reference_fn=pil_reference_wrapper(F.elastic_image_pil),
reference_inputs_fn=reference_inputs_elastic_mask,
float32_vs_uint8=True,
# TODO: investigate
closeness_kwargs=pil_reference_pixel_difference(80, agg_method="mean"),
), ),
KernelInfo( KernelInfo(
F.elastic_video, F.elastic_video,
sample_inputs_fn=sample_inputs_elastic_video, sample_inputs_fn=sample_inputs_elastic_video,
closeness_kwargs=cuda_vs_cpu_pixel_difference(),
), ),
] ]
) )
...@@ -2089,8 +2028,7 @@ KERNEL_INFOS.extend( ...@@ -2089,8 +2028,7 @@ KERNEL_INFOS.extend(
reference_inputs_fn=reference_inputs_adjust_hue_image_tensor, reference_inputs_fn=reference_inputs_adjust_hue_image_tensor,
float32_vs_uint8=True, float32_vs_uint8=True,
closeness_kwargs={ closeness_kwargs={
# TODO: investigate **pil_reference_pixel_difference(2, agg_method="mean"),
**pil_reference_pixel_difference(20),
**float32_vs_uint8_pixel_difference(), **float32_vs_uint8_pixel_difference(),
}, },
), ),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment