Unverified Commit 4774fe3a authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

Split tests for transforms v2 and prototype (#7278)

parent ac1512b6
......@@ -6,4 +6,17 @@ eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env
python -m torch.utils.collect_env
pytest --junitxml=test-results/junit.xml -v --durations 20
case "$(uname -s)" in
Darwin*)
# The largest macOS runner is not able to handle the regular test suite plus the transforms v2 tests at the same
# time due to insufficient resources. Thus, we ignore the transforms v2 tests at first and run them in a separate
# step afterwards.
GLOB='test/test_transforms_v2*'
pytest --junitxml=test-results/junit.xml -v --durations 20 --ignore-glob="${GLOB}"
eval "pytest --junitxml=test-results/junit-transforms-v2.xml -v --durations 20 ${GLOB}"
;;
*)
pytest --junitxml=test-results/junit.xml -v --durations 20
;;
esac
......@@ -18,7 +18,8 @@ from collections import Counter, defaultdict
import numpy as np
import pytest
import torch
from datasets_utils import combinations_grid, create_image_file, create_image_folder, make_tar, make_zip
from common_utils import combinations_grid
from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip
from torch.nn.functional import one_hot
from torch.testing import make_tensor as _make_tensor
from torchvision.prototype import datasets
......
import collections.abc
import contextlib
import dataclasses
import enum
import functools
import itertools
import os
import pathlib
import random
import shutil
import tempfile
from collections import defaultdict
from typing import Callable, Sequence, Tuple, Union
import numpy as np
import PIL.Image
import pytest
import torch
import torch.testing
from PIL import Image
from torchvision import io
import __main__ # noqa: 401
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import datapoints, io
from torchvision.transforms._functional_tensor import _max_value as get_max_value
from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor
IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
......@@ -137,9 +150,6 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu
return batch_tensor
assert_equal = functools.partial(torch.testing.assert_close, rtol=0, atol=0)
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
names = []
for i in range(num_videos):
......@@ -160,6 +170,7 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None):
# FIXME: this is handled automatically by `assert_equal` below. Let's remove this in favor of it
np_pil_image = np.array(pil_image)
if np_pil_image.ndim == 2:
np_pil_image = np_pil_image[:, :, None]
......@@ -172,6 +183,7 @@ def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None):
def _assert_approx_equal_tensor_to_pil(
tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None
):
# FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it
# TODO: we could just merge this into _assert_equal_tensor_to_pil
np_pil_image = np.array(pil_image)
if np_pil_image.ndim == 2:
......@@ -237,3 +249,592 @@ def cache(fn):
return out
return wrapper
def combinations_grid(**kwargs):
"""Creates a grid of input combinations.
Each element in the returned sequence is a dictionary containing one possible combination as values.
Example:
>>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
[
{'foo': 'bar', 'spam': 'eggs'},
{'foo': 'bar', 'spam': 'ham'},
{'foo': 'baz', 'spam': 'eggs'},
{'foo': 'baz', 'spam': 'ham'}
]
"""
return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]
class ImagePair(TensorLikePair):
def __init__(
self,
actual,
expected,
*,
mae=False,
**other_parameters,
):
if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
actual, expected = [to_image_tensor(input) for input in [actual, expected]]
super().__init__(actual, expected, **other_parameters)
self.mae = mae
def compare(self) -> None:
actual, expected = self.actual, self.expected
self._compare_attributes(actual, expected)
actual, expected = self._equalize_attributes(actual, expected)
if self.mae:
actual, expected = self._promote_for_comparison(actual, expected)
mae = float(torch.abs(actual - expected).float().mean())
if mae > self.atol:
self._fail(
AssertionError,
f"The MAE of the images is {mae}, but only {self.atol} is allowed.",
)
else:
super()._compare_values(actual, expected)
def assert_close(
actual,
expected,
*,
allow_subclasses=True,
rtol=None,
atol=None,
equal_nan=False,
check_device=True,
check_dtype=True,
check_layout=True,
check_stride=False,
msg=None,
**kwargs,
):
"""Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison"""
__tracebackhide__ = True
error_metas = not_close_error_metas(
actual,
expected,
pair_types=(
NonePair,
BooleanPair,
NumberPair,
ImagePair,
TensorLikePair,
),
allow_subclasses=allow_subclasses,
rtol=rtol,
atol=atol,
equal_nan=equal_nan,
check_device=check_device,
check_dtype=check_dtype,
check_layout=check_layout,
check_stride=check_stride,
**kwargs,
)
if error_metas:
raise error_metas[0].to_error(msg)
assert_equal = functools.partial(assert_close, rtol=0, atol=0)
def parametrized_error_message(*args, **kwargs):
def to_str(obj):
if isinstance(obj, torch.Tensor) and obj.numel() > 10:
return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
elif isinstance(obj, enum.Enum):
return f"{type(obj).__name__}.{obj.name}"
else:
return repr(obj)
if args or kwargs:
postfix = "\n".join(
[
"",
"Failure happened for the following parameters:",
"",
*[to_str(arg) for arg in args],
*[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
]
)
else:
postfix = ""
def wrapper(msg):
return msg + postfix
return wrapper
class ArgsKwargs:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def __iter__(self):
yield self.args
yield self.kwargs
def load(self, device="cpu"):
return ArgsKwargs(
*(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args),
**{
keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg
for keyword, arg in self.kwargs.items()
},
)
DEFAULT_SQUARE_SPATIAL_SIZE = 15
DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33)
DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9)
DEFAULT_SPATIAL_SIZES = (
DEFAULT_LANDSCAPE_SPATIAL_SIZE,
DEFAULT_PORTRAIT_SPATIAL_SIZE,
DEFAULT_SQUARE_SPATIAL_SIZE,
"random",
)
def _parse_spatial_size(size, *, name="size"):
if size == "random":
return tuple(torch.randint(15, 33, (2,)).tolist())
elif isinstance(size, int) and size > 0:
return (size, size)
elif (
isinstance(size, collections.abc.Sequence)
and len(size) == 2
and all(isinstance(length, int) and length > 0 for length in size)
):
return tuple(size)
else:
raise pytest.UsageError(
f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers,"
f"but got {size} instead."
)
VALID_EXTRA_DIMS = ((), (4,), (2, 3))
DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5))
DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS)
def from_loader(loader_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loader = loader_fn(*args, **kwargs)
return loader.load(device)
return wrapper
def from_loaders(loaders_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loaders = loaders_fn(*args, **kwargs)
for loader in loaders:
yield loader.load(device)
return wrapper
@dataclasses.dataclass
class TensorLoader:
fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor]
shape: Sequence[int]
dtype: torch.dtype
def load(self, device):
return self.fn(self.shape, self.dtype, device)
@dataclasses.dataclass
class ImageLoader(TensorLoader):
spatial_size: Tuple[int, int] = dataclasses.field(init=False)
num_channels: int = dataclasses.field(init=False)
def __post_init__(self):
self.spatial_size = self.shape[-2:]
self.num_channels = self.shape[-3]
NUM_CHANNELS_MAP = {
"GRAY": 1,
"GRAY_ALPHA": 2,
"RGB": 3,
"RGBA": 4,
}
def get_num_channels(color_space):
num_channels = NUM_CHANNELS_MAP.get(color_space)
if not num_channels:
raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}")
return num_channels
def make_image_loader(
size="random",
*,
color_space="RGB",
extra_dims=(),
dtype=torch.float32,
constant_alpha=True,
):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)
def fn(shape, dtype, device):
max_value = get_max_value(dtype)
data = torch.testing.make_tensor(shape, low=0, high=max_value, dtype=dtype, device=device)
if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha:
data[..., -1, :, :] = max_value
return datapoints.Image(data)
return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype)
make_image = from_loader(make_image_loader)
def make_image_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
"GRAY",
"GRAY_ALPHA",
"RGB",
"RGBA",
),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.float32, torch.float64, torch.uint8),
constant_alpha=True,
):
for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes):
yield make_image_loader(**params, constant_alpha=constant_alpha)
make_images = from_loaders(make_image_loaders)
def make_image_loader_for_interpolation(size="random", *, color_space="RGB", dtype=torch.uint8):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)
def fn(shape, dtype, device):
height, width = shape[-2:]
image_pil = (
PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
.resize((width, height))
.convert(
{
"GRAY": "L",
"GRAY_ALPHA": "LA",
"RGB": "RGB",
"RGBA": "RGBA",
}[color_space]
)
)
image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype)
return datapoints.Image(image_tensor)
return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype)
def make_image_loaders_for_interpolation(
sizes=((233, 147),),
color_spaces=("RGB",),
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes):
yield make_image_loader_for_interpolation(**params)
@dataclasses.dataclass
class BoundingBoxLoader(TensorLoader):
format: datapoints.BoundingBoxFormat
spatial_size: Tuple[int, int]
def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
low, high = torch.broadcast_tensors(
*[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))]
)
return torch.stack(
[
torch.randint(low_scalar, high_scalar, (), **kwargs)
for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist())
]
).reshape(low.shape)
def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32):
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]
if format not in {
datapoints.BoundingBoxFormat.XYXY,
datapoints.BoundingBoxFormat.XYWH,
datapoints.BoundingBoxFormat.CXCYWH,
}:
raise pytest.UsageError(f"Can't make bounding box in format {format}")
spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
def fn(shape, dtype, device):
*extra_dims, num_coordinates = shape
if num_coordinates != 4:
raise pytest.UsageError()
if any(dim == 0 for dim in extra_dims):
return datapoints.BoundingBox(
torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)
height, width = spatial_size
if format == datapoints.BoundingBoxFormat.XYXY:
x1 = torch.randint(0, width // 2, extra_dims)
y1 = torch.randint(0, height // 2, extra_dims)
x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
parts = (x1, y1, x2, y2)
elif format == datapoints.BoundingBoxFormat.XYWH:
x = torch.randint(0, width // 2, extra_dims)
y = torch.randint(0, height // 2, extra_dims)
w = randint_with_tensor_bounds(1, width - x)
h = randint_with_tensor_bounds(1, height - y)
parts = (x, y, w, h)
else: # format == features.BoundingBoxFormat.CXCYWH:
cx = torch.randint(1, width - 1, extra_dims)
cy = torch.randint(1, height - 1, extra_dims)
w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1)
parts = (cx, cy, w, h)
return datapoints.BoundingBox(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
)
return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
make_bounding_box = from_loader(make_bounding_box_loader)
def make_bounding_box_loaders(
*,
extra_dims=DEFAULT_EXTRA_DIMS,
formats=tuple(datapoints.BoundingBoxFormat),
spatial_size="random",
dtypes=(torch.float32, torch.float64, torch.int64),
):
for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
yield make_bounding_box_loader(**params, spatial_size=spatial_size)
make_bounding_boxes = from_loaders(make_bounding_box_loaders)
class MaskLoader(TensorLoader):
pass
def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8):
# This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
size = _parse_spatial_size(size)
num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects
def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device)
return datapoints.Mask(data)
return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype)
make_detection_mask = from_loader(make_detection_mask_loader)
def make_detection_mask_loaders(
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes):
yield make_detection_mask_loader(**params)
make_detection_masks = from_loaders(make_detection_mask_loaders)
def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
size = _parse_spatial_size(size)
num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories
def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device)
return datapoints.Mask(data)
return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype)
make_segmentation_mask = from_loader(make_segmentation_mask_loader)
def make_segmentation_mask_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes):
yield make_segmentation_mask_loader(**params)
make_segmentation_masks = from_loaders(make_segmentation_mask_loaders)
def make_mask_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes)
yield from make_segmentation_mask_loaders(
sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes
)
make_masks = from_loaders(make_mask_loaders)
class VideoLoader(ImageLoader):
pass
def make_video_loader(
size="random",
*,
color_space="RGB",
num_frames="random",
extra_dims=(),
dtype=torch.uint8,
):
size = _parse_spatial_size(size)
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames
def fn(shape, dtype, device):
video = make_image(size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device)
return datapoints.Video(video)
return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)
make_video = from_loader(make_video_loader)
def make_video_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
"GRAY",
"RGB",
),
num_frames=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8, torch.float32, torch.float64),
):
for params in combinations_grid(
size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes
):
yield make_video_loader(**params)
make_videos = from_loaders(make_video_loaders)
class TestMark:
def __init__(
self,
# Tuple of test class name and test function name that identifies the test the mark is applied to. If there is
# no test class, i.e. a standalone test function, use `None`.
test_id,
# `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail`
mark,
*,
# Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be
# applied. If omitted, defaults to always apply.
condition=None,
):
self.test_id = test_id
self.mark = mark
self.condition = condition or (lambda args_kwargs: True)
def mark_framework_limitation(test_id, reason, condition=None):
# The purpose of this function is to have a single entry point for skip marks that are only there, because the test
# framework cannot handle the kernel in general or a specific parameter combination.
# As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is
# still justified.
# We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus,
# we are wasting CI resources for no reason for most of the time
return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition)
class InfoBase:
def __init__(
self,
*,
# Identifier if the info that shows up the parametrization.
id,
# Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization.
# See the `TestMark` class for details
test_marks=None,
# Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see
# `TestMark`), the dtype, and the device.
closeness_kwargs=None,
):
self.id = id
self.test_marks = test_marks or []
test_marks_map = defaultdict(list)
for test_mark in self.test_marks:
test_marks_map[test_mark.test_id].append(test_mark)
self._test_marks_map = dict(test_marks_map)
self.closeness_kwargs = closeness_kwargs or dict()
def get_marks(self, test_id, args_kwargs):
return [
test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs)
]
def get_closeness_kwargs(self, test_id, *, dtype, device):
if not (isinstance(test_id, tuple) and len(test_id) == 2):
msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name"
if callable(test_id):
msg += ". Did you forget to add the `test_id` fixture to parameters of the test?"
else:
msg += f", but got {test_id} instead."
raise pytest.UsageError(msg)
if isinstance(device, torch.device):
device = device.type
return self.closeness_kwargs.get((test_id, dtype, device), dict())
......@@ -170,23 +170,6 @@ def test_all_configs(test):
return wrapper
def combinations_grid(**kwargs):
"""Creates a grid of input combinations.
Each element in the returned sequence is a dictionary containing one possible combination as values.
Example:
>>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
[
{'foo': 'bar', 'spam': 'eggs'},
{'foo': 'bar', 'spam': 'ham'},
{'foo': 'baz', 'spam': 'eggs'},
{'foo': 'baz', 'spam': 'ham'}
]
"""
return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]
class DatasetTestCase(unittest.TestCase):
"""Abstract base class for all dataset testcases.
......
"""This module is separated from common_utils.py to prevent the former to be dependent on torchvision.prototype"""
import collections.abc
import dataclasses
import enum
import functools
import pathlib
from collections import defaultdict
from typing import Callable, Optional, Sequence, Tuple, Union
from typing import Optional, Sequence
import PIL.Image
import pytest
import torch
import torch.testing
import torchvision.prototype.datapoints as proto_datapoints
from datasets_utils import combinations_grid
from torch.nn.functional import one_hot
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import datapoints
from torchvision.transforms._functional_tensor import _max_value as get_max_value
from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor
__all__ = [
"assert_close",
"assert_equal",
"ArgsKwargs",
"VALID_EXTRA_DIMS",
"make_image_loaders",
"make_image",
"make_images",
"make_bounding_box_loaders",
"make_bounding_box",
"make_bounding_boxes",
"make_label",
"make_one_hot_labels",
"make_detection_mask_loaders",
"make_detection_mask",
"make_detection_masks",
"make_segmentation_mask_loaders",
"make_segmentation_mask",
"make_segmentation_masks",
"make_mask_loaders",
"make_masks",
"make_video",
"make_videos",
"TestMark",
"mark_framework_limitation",
"InfoBase",
]
class ImagePair(TensorLikePair):
def __init__(
self,
actual,
expected,
*,
mae=False,
**other_parameters,
):
if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
actual, expected = [to_image_tensor(input) for input in [actual, expected]]
super().__init__(actual, expected, **other_parameters)
self.mae = mae
def compare(self) -> None:
actual, expected = self.actual, self.expected
self._compare_attributes(actual, expected)
actual, expected = self._equalize_attributes(actual, expected)
if self.mae:
actual, expected = self._promote_for_comparison(actual, expected)
mae = float(torch.abs(actual - expected).float().mean())
if mae > self.atol:
self._fail(
AssertionError,
f"The MAE of the images is {mae}, but only {self.atol} is allowed.",
)
else:
super()._compare_values(actual, expected)
def assert_close(
actual,
expected,
*,
allow_subclasses=True,
rtol=None,
atol=None,
equal_nan=False,
check_device=True,
check_dtype=True,
check_layout=True,
check_stride=False,
msg=None,
**kwargs,
):
"""Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison"""
__tracebackhide__ = True
error_metas = not_close_error_metas(
actual,
expected,
pair_types=(
NonePair,
BooleanPair,
NumberPair,
ImagePair,
TensorLikePair,
),
allow_subclasses=allow_subclasses,
rtol=rtol,
atol=atol,
equal_nan=equal_nan,
check_device=check_device,
check_dtype=check_dtype,
check_layout=check_layout,
check_stride=check_stride,
**kwargs,
)
if error_metas:
raise error_metas[0].to_error(msg)
assert_equal = functools.partial(assert_close, rtol=0, atol=0)
def parametrized_error_message(*args, **kwargs):
def to_str(obj):
if isinstance(obj, torch.Tensor) and obj.numel() > 10:
return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
elif isinstance(obj, enum.Enum):
return f"{type(obj).__name__}.{obj.name}"
else:
return repr(obj)
if args or kwargs:
postfix = "\n".join(
[
"",
"Failure happened for the following parameters:",
"",
*[to_str(arg) for arg in args],
*[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
]
)
else:
postfix = ""
def wrapper(msg):
return msg + postfix
return wrapper
class ArgsKwargs:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def __iter__(self):
yield self.args
yield self.kwargs
def load(self, device="cpu"):
return ArgsKwargs(
*(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args),
**{
keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg
for keyword, arg in self.kwargs.items()
},
)
DEFAULT_SQUARE_SPATIAL_SIZE = 15
DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33)
DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9)
DEFAULT_SPATIAL_SIZES = (
DEFAULT_LANDSCAPE_SPATIAL_SIZE,
DEFAULT_PORTRAIT_SPATIAL_SIZE,
DEFAULT_SQUARE_SPATIAL_SIZE,
"random",
)
def _parse_spatial_size(size, *, name="size"):
if size == "random":
return tuple(torch.randint(15, 33, (2,)).tolist())
elif isinstance(size, int) and size > 0:
return (size, size)
elif (
isinstance(size, collections.abc.Sequence)
and len(size) == 2
and all(isinstance(length, int) and length > 0 for length in size)
):
return tuple(size)
else:
raise pytest.UsageError(
f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers,"
f"but got {size} instead."
)
VALID_EXTRA_DIMS = ((), (4,), (2, 3))
DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5))
DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS)
def from_loader(loader_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loader = loader_fn(*args, **kwargs)
return loader.load(device)
return wrapper
def from_loaders(loaders_fn):
def wrapper(*args, **kwargs):
device = kwargs.pop("device", "cpu")
loaders = loaders_fn(*args, **kwargs)
for loader in loaders:
yield loader.load(device)
return wrapper
@dataclasses.dataclass
class TensorLoader:
fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor]
shape: Sequence[int]
dtype: torch.dtype
def load(self, device):
return self.fn(self.shape, self.dtype, device)
@dataclasses.dataclass
class ImageLoader(TensorLoader):
spatial_size: Tuple[int, int] = dataclasses.field(init=False)
num_channels: int = dataclasses.field(init=False)
def __post_init__(self):
self.spatial_size = self.shape[-2:]
self.num_channels = self.shape[-3]
NUM_CHANNELS_MAP = {
"GRAY": 1,
"GRAY_ALPHA": 2,
"RGB": 3,
"RGBA": 4,
}
def get_num_channels(color_space):
num_channels = NUM_CHANNELS_MAP.get(color_space)
if not num_channels:
raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}")
return num_channels
def make_image_loader(
size="random",
*,
color_space="RGB",
extra_dims=(),
dtype=torch.float32,
constant_alpha=True,
):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)
def fn(shape, dtype, device):
max_value = get_max_value(dtype)
data = torch.testing.make_tensor(shape, low=0, high=max_value, dtype=dtype, device=device)
if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha:
data[..., -1, :, :] = max_value
return datapoints.Image(data)
return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype)
make_image = from_loader(make_image_loader)
def make_image_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
"GRAY",
"GRAY_ALPHA",
"RGB",
"RGBA",
),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.float32, torch.float64, torch.uint8),
constant_alpha=True,
):
for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes):
yield make_image_loader(**params, constant_alpha=constant_alpha)
make_images = from_loaders(make_image_loaders)
def make_image_loader_for_interpolation(size="random", *, color_space="RGB", dtype=torch.uint8):
size = _parse_spatial_size(size)
num_channels = get_num_channels(color_space)
def fn(shape, dtype, device):
height, width = shape[-2:]
image_pil = (
PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
.resize((width, height))
.convert(
{
"GRAY": "L",
"GRAY_ALPHA": "LA",
"RGB": "RGB",
"RGBA": "RGBA",
}[color_space]
)
)
image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype)
return datapoints.Image(image_tensor)
return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype)
def make_image_loaders_for_interpolation(
sizes=((233, 147),),
color_spaces=("RGB",),
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes):
yield make_image_loader_for_interpolation(**params)
@dataclasses.dataclass
class BoundingBoxLoader(TensorLoader):
format: datapoints.BoundingBoxFormat
spatial_size: Tuple[int, int]
def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
low, high = torch.broadcast_tensors(
*[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))]
)
return torch.stack(
[
torch.randint(low_scalar, high_scalar, (), **kwargs)
for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist())
]
).reshape(low.shape)
def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32):
if isinstance(format, str):
format = datapoints.BoundingBoxFormat[format]
if format not in {
datapoints.BoundingBoxFormat.XYXY,
datapoints.BoundingBoxFormat.XYWH,
datapoints.BoundingBoxFormat.CXCYWH,
}:
raise pytest.UsageError(f"Can't make bounding box in format {format}")
spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
def fn(shape, dtype, device):
*extra_dims, num_coordinates = shape
if num_coordinates != 4:
raise pytest.UsageError()
if any(dim == 0 for dim in extra_dims):
return datapoints.BoundingBox(
torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
)
height, width = spatial_size
if format == datapoints.BoundingBoxFormat.XYXY:
x1 = torch.randint(0, width // 2, extra_dims)
y1 = torch.randint(0, height // 2, extra_dims)
x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
parts = (x1, y1, x2, y2)
elif format == datapoints.BoundingBoxFormat.XYWH:
x = torch.randint(0, width // 2, extra_dims)
y = torch.randint(0, height // 2, extra_dims)
w = randint_with_tensor_bounds(1, width - x)
h = randint_with_tensor_bounds(1, height - y)
parts = (x, y, w, h)
else: # format == features.BoundingBoxFormat.CXCYWH:
cx = torch.randint(1, width - 1, extra_dims)
cy = torch.randint(1, height - 1, extra_dims)
w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1)
parts = (cx, cy, w, h)
return datapoints.BoundingBox(
torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
)
return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
make_bounding_box = from_loader(make_bounding_box_loader)
def make_bounding_box_loaders(
*,
extra_dims=DEFAULT_EXTRA_DIMS,
formats=tuple(datapoints.BoundingBoxFormat),
spatial_size="random",
dtypes=(torch.float32, torch.float64, torch.int64),
):
for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
yield make_bounding_box_loader(**params, spatial_size=spatial_size)
from common_utils import combinations_grid, DEFAULT_EXTRA_DIMS, from_loader, from_loaders, TensorLoader
from torch.nn.functional import one_hot
make_bounding_boxes = from_loaders(make_bounding_box_loaders)
from torchvision.prototype import datapoints
@dataclasses.dataclass
......@@ -458,7 +40,7 @@ def make_label_loader(*, extra_dims=(), categories=None, dtype=torch.int64):
# The idiom `make_tensor(..., dtype=torch.int64).to(dtype)` is intentional to only get integer values,
# regardless of the requested dtype, e.g. 0 or 0.0 rather than 0 or 0.123
data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=torch.int64, device=device).to(dtype)
return proto_datapoints.Label(data, categories=categories)
return datapoints.Label(data, categories=categories)
return LabelLoader(fn, shape=extra_dims, dtype=dtype, categories=categories)
......@@ -482,7 +64,7 @@ def make_one_hot_label_loader(*, categories=None, extra_dims=(), dtype=torch.int
# since `one_hot` only supports int64
label = make_label_loader(extra_dims=extra_dims, categories=num_categories, dtype=torch.int64).load(device)
data = one_hot(label, num_classes=num_categories).to(dtype)
return proto_datapoints.OneHotLabel(data, categories=categories)
return datapoints.OneHotLabel(data, categories=categories)
return OneHotLabelLoader(fn, shape=(*extra_dims, num_categories), dtype=dtype, categories=categories)
......@@ -498,195 +80,3 @@ def make_one_hot_label_loaders(
make_one_hot_labels = from_loaders(make_one_hot_label_loaders)
class MaskLoader(TensorLoader):
pass
def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8):
# This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
size = _parse_spatial_size(size)
num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects
def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device)
return datapoints.Mask(data)
return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype)
make_detection_mask = from_loader(make_detection_mask_loader)
def make_detection_mask_loaders(
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes):
yield make_detection_mask_loader(**params)
make_detection_masks = from_loaders(make_detection_mask_loaders)
def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
# This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
size = _parse_spatial_size(size)
num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories
def fn(shape, dtype, device):
data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device)
return datapoints.Mask(data)
return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype)
make_segmentation_mask = from_loader(make_segmentation_mask_loader)
def make_segmentation_mask_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes):
yield make_segmentation_mask_loader(**params)
make_segmentation_masks = from_loaders(make_segmentation_mask_loaders)
def make_mask_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
num_objects=(1, 0, "random"),
num_categories=(1, 2, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8,),
):
yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes)
yield from make_segmentation_mask_loaders(
sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes
)
make_masks = from_loaders(make_mask_loaders)
class VideoLoader(ImageLoader):
pass
def make_video_loader(
size="random",
*,
color_space="RGB",
num_frames="random",
extra_dims=(),
dtype=torch.uint8,
):
size = _parse_spatial_size(size)
num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames
def fn(shape, dtype, device):
video = make_image(size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device)
return datapoints.Video(video)
return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)
make_video = from_loader(make_video_loader)
def make_video_loaders(
*,
sizes=DEFAULT_SPATIAL_SIZES,
color_spaces=(
"GRAY",
"RGB",
),
num_frames=(1, 0, "random"),
extra_dims=DEFAULT_EXTRA_DIMS,
dtypes=(torch.uint8, torch.float32, torch.float64),
):
for params in combinations_grid(
size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes
):
yield make_video_loader(**params)
make_videos = from_loaders(make_video_loaders)
class TestMark:
def __init__(
self,
# Tuple of test class name and test function name that identifies the test the mark is applied to. If there is
# no test class, i.e. a standalone test function, use `None`.
test_id,
# `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail`
mark,
*,
# Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be
# applied. If omitted, defaults to always apply.
condition=None,
):
self.test_id = test_id
self.mark = mark
self.condition = condition or (lambda args_kwargs: True)
def mark_framework_limitation(test_id, reason, condition=None):
# The purpose of this function is to have a single entry point for skip marks that are only there, because the test
# framework cannot handle the kernel in general or a specific parameter combination.
# As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is
# still justified.
# We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus,
# we are wasting CI resources for no reason for most of the time
return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition)
class InfoBase:
def __init__(
self,
*,
# Identifier if the info that shows up the parametrization.
id,
# Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization.
# See the `TestMark` class for details
test_marks=None,
# Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see
# `TestMark`), the dtype, and the device.
closeness_kwargs=None,
):
self.id = id
self.test_marks = test_marks or []
test_marks_map = defaultdict(list)
for test_mark in self.test_marks:
test_marks_map[test_mark.test_id].append(test_mark)
self._test_marks_map = dict(test_marks_map)
self.closeness_kwargs = closeness_kwargs or dict()
def get_marks(self, test_id, args_kwargs):
return [
test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs)
]
def get_closeness_kwargs(self, test_id, *, dtype, device):
if not (isinstance(test_id, tuple) and len(test_id) == 2):
msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name"
if callable(test_id):
msg += ". Did you forget to add the `test_id` fixture to parameters of the test?"
else:
msg += f", but got {test_id} instead."
raise pytest.UsageError(msg)
if isinstance(device, torch.device):
device = device.type
return self.closeness_kwargs.get((test_id, dtype, device), dict())
import pytest
import torch
from PIL import Image
from torchvision import datapoints
@pytest.mark.parametrize("data", [torch.rand(3, 32, 32), Image.new("RGB", (32, 32), color=123)])
def test_image_instance(data):
image = datapoints.Image(data)
assert isinstance(image, torch.Tensor)
assert image.ndim == 3 and image.shape[0] == 3
@pytest.mark.parametrize("data", [torch.randint(0, 10, size=(1, 32, 32)), Image.new("L", (32, 32), color=2)])
def test_mask_instance(data):
mask = datapoints.Mask(data)
assert isinstance(mask, torch.Tensor)
assert mask.ndim == 3 and mask.shape[0] == 1
@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]])
@pytest.mark.parametrize(
"format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
)
def test_bbox_instance(data, format):
bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32))
assert isinstance(bboxes, torch.Tensor)
assert bboxes.ndim == 2 and bboxes.shape[1] == 4
if isinstance(format, str):
format = datapoints.BoundingBoxFormat.from_str(format.upper())
assert bboxes.format == format
......@@ -22,12 +22,13 @@ import PIL
import pytest
import torch
import torch.nn.functional as F
from common_utils import combinations_grid
from torchvision import datasets
class STL10TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.STL10
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled"))
@staticmethod
def _make_binary_file(num_elements, root, name):
......@@ -113,9 +114,7 @@ class Caltech101TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Caltech101
FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
target_type=("category", "annotation", ["category", "annotation"])
)
ADDITIONAL_CONFIGS = combinations_grid(target_type=("category", "annotation", ["category", "annotation"]))
REQUIRED_PACKAGES = ("scipy",)
def inject_fake_data(self, tmpdir, config):
......@@ -208,7 +207,7 @@ class Caltech256TestCase(datasets_utils.ImageDatasetTestCase):
class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.WIDERFace
FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
def inject_fake_data(self, tmpdir, config):
widerface_dir = pathlib.Path(tmpdir) / "widerface"
......@@ -269,8 +268,8 @@ class CityScapesTestCase(datasets_utils.ImageDatasetTestCase):
"color",
)
ADDITIONAL_CONFIGS = (
*datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES),
*datasets_utils.combinations_grid(
*combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES),
*combinations_grid(
mode=("coarse",),
split=("train", "train_extra", "val"),
target_type=TARGET_TYPES,
......@@ -387,7 +386,7 @@ class CityScapesTestCase(datasets_utils.ImageDatasetTestCase):
class ImageNetTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.ImageNet
REQUIRED_PACKAGES = ("scipy",)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"))
def inject_fake_data(self, tmpdir, config):
tmpdir = pathlib.Path(tmpdir)
......@@ -417,7 +416,7 @@ class ImageNetTestCase(datasets_utils.ImageDatasetTestCase):
class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.CIFAR10
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(train=(True, False))
_VERSION_CONFIG = dict(
base_folder="cifar-10-batches-py",
......@@ -490,7 +489,7 @@ class CelebATestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.CelebA
FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None)))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train", "valid", "test", "all"),
target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]),
)
......@@ -614,9 +613,7 @@ class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase):
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image)
ADDITIONAL_CONFIGS = (
*datasets_utils.combinations_grid(
year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval")
),
*combinations_grid(year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval")),
dict(year="2007", image_set="test"),
)
......@@ -791,7 +788,7 @@ class CocoCaptionsTestCase(CocoDetectionTestCase):
def _create_annotations(self, image_ids, num_annotations_per_image):
captions = [str(idx) for idx in range(num_annotations_per_image)]
annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions)
annotations = combinations_grid(image_id=image_ids, caption=captions)
for id, annotation in enumerate(annotations):
annotation["id"] = id
return annotations, dict(captions=captions)
......@@ -805,7 +802,7 @@ class CocoCaptionsTestCase(CocoDetectionTestCase):
class UCF101TestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.UCF101
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False))
_VIDEO_FOLDER = "videos"
_ANNOTATIONS_FOLDER = "annotations"
......@@ -866,9 +863,7 @@ class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.LSUN
REQUIRED_PACKAGES = ("lmdb",)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"])
)
ADDITIONAL_CONFIGS = combinations_grid(classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]))
_CATEGORIES = (
"bedroom",
......@@ -953,7 +948,7 @@ class LSUNTestCase(datasets_utils.ImageDatasetTestCase):
class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.Kinetics
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"), num_classes=("400", "600", "700"))
def inject_fake_data(self, tmpdir, config):
classes = ("Abseiling", "Zumba")
......@@ -973,7 +968,7 @@ class KineticsTestCase(datasets_utils.VideoDatasetTestCase):
class HMDB51TestCase(datasets_utils.VideoDatasetTestCase):
DATASET_CLASS = datasets.HMDB51
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False))
_VIDEO_FOLDER = "videos"
_SPLITS_FOLDER = "splits"
......@@ -1033,7 +1028,7 @@ class HMDB51TestCase(datasets_utils.VideoDatasetTestCase):
class OmniglotTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Omniglot
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(background=(True, False))
def inject_fake_data(self, tmpdir, config):
target_folder = (
......@@ -1113,7 +1108,7 @@ class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase):
class USPSTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.USPS
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(train=(True, False))
def inject_fake_data(self, tmpdir, config):
num_images = 2 if config["train"] else 1
......@@ -1135,7 +1130,7 @@ class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase):
REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse")
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation")
)
......@@ -1221,7 +1216,7 @@ class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase):
_TRAIN_FEATURE_TYPES = (torch.Tensor,)
_TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor)
datasets_utils.combinations_grid(train=(True, False))
combinations_grid(train=(True, False))
_NAME = "liberty"
......@@ -1380,7 +1375,7 @@ class Flickr30kTestCase(Flickr8kTestCase):
class MNISTTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.MNIST
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(train=(True, False))
_MAGIC_DTYPES = {
torch.uint8: 8,
......@@ -1450,7 +1445,7 @@ class EMNISTTestCase(MNISTTestCase):
DATASET_CLASS = datasets.EMNIST
DEFAULT_CONFIG = dict(split="byclass")
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("byclass", "bymerge", "balanced", "letters", "digits", "mnist"), train=(True, False)
)
......@@ -1461,7 +1456,7 @@ class EMNISTTestCase(MNISTTestCase):
class QMNISTTestCase(MNISTTestCase):
DATASET_CLASS = datasets.QMNIST
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(what=("train", "test", "test10k", "nist"))
ADDITIONAL_CONFIGS = combinations_grid(what=("train", "test", "test10k", "nist"))
_LABELS_SIZE = (8,)
_LABELS_DTYPE = torch.int32
......@@ -1507,7 +1502,7 @@ class MovingMNISTTestCase(datasets_utils.DatasetTestCase):
DATASET_CLASS = datasets.MovingMNIST
FEATURE_TYPES = (torch.Tensor,)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=(None, "train", "test"), split_ratio=(10, 1, 19))
ADDITIONAL_CONFIGS = combinations_grid(split=(None, "train", "test"), split_ratio=(10, 1, 19))
def inject_fake_data(self, tmpdir, config):
base_folder = os.path.join(tmpdir, self.DATASET_CLASS.__name__)
......@@ -1543,7 +1538,7 @@ class DatasetFolderTestCase(datasets_utils.ImageDatasetTestCase):
# We only iterate over different 'extensions' here and handle the tests for 'is_valid_file' in the
# 'test_is_valid_file()' method.
DEFAULT_CONFIG = dict(extensions=_EXTENSIONS)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(extensions=[(ext,) for ext in _EXTENSIONS])
ADDITIONAL_CONFIGS = combinations_grid(extensions=[(ext,) for ext in _EXTENSIONS])
def dataset_args(self, tmpdir, config):
return tmpdir, datasets.folder.pil_loader
......@@ -1612,7 +1607,7 @@ class ImageFolderTestCase(datasets_utils.ImageDatasetTestCase):
class KittiTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Kitti
FEATURE_TYPES = (PIL.Image.Image, (list, type(None))) # test split returns None as target
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False))
ADDITIONAL_CONFIGS = combinations_grid(train=(True, False))
def inject_fake_data(self, tmpdir, config):
kitti_dir = os.path.join(tmpdir, "Kitti", "raw")
......@@ -1648,7 +1643,7 @@ class KittiTestCase(datasets_utils.ImageDatasetTestCase):
class SvhnTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.SVHN
REQUIRED_PACKAGES = ("scipy",)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "extra"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "extra"))
def inject_fake_data(self, tmpdir, config):
import scipy.io as sio
......@@ -1669,7 +1664,7 @@ class SvhnTestCase(datasets_utils.ImageDatasetTestCase):
class Places365TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Places365
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train-standard", "train-challenge", "val"),
small=(False, True),
)
......@@ -1761,7 +1756,7 @@ class INaturalistTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.INaturalist
FEATURE_TYPES = (PIL.Image.Image, (int, tuple))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
target_type=("kingdom", "full", "genus", ["kingdom", "phylum", "class", "order", "family", "genus", "full"]),
version=("2021_train",),
)
......@@ -1798,7 +1793,7 @@ class INaturalistTestCase(datasets_utils.ImageDatasetTestCase):
class LFWPeopleTestCase(datasets_utils.DatasetTestCase):
DATASET_CLASS = datasets.LFWPeople
FEATURE_TYPES = (PIL.Image.Image, int)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("10fold", "train", "test"), image_set=("original", "funneled", "deepfunneled")
)
_IMAGES_DIR = {"original": "lfw", "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled"}
......@@ -1874,7 +1869,7 @@ class LFWPairsTestCase(LFWPeopleTestCase):
class SintelTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Sintel
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"), pass_name=("clean", "final", "both"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"), pass_name=("clean", "final", "both"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
FLOW_H, FLOW_W = 3, 4
......@@ -1942,7 +1937,7 @@ class SintelTestCase(datasets_utils.ImageDatasetTestCase):
class KittiFlowTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.KittiFlow
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
def inject_fake_data(self, tmpdir, config):
......@@ -2002,7 +1997,7 @@ class KittiFlowTestCase(datasets_utils.ImageDatasetTestCase):
class FlyingChairsTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FlyingChairs
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
FLOW_H, FLOW_W = 3, 4
......@@ -2057,7 +2052,7 @@ class FlyingChairsTestCase(datasets_utils.ImageDatasetTestCase):
class FlyingThings3DTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FlyingThings3D
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train", "test"), pass_name=("clean", "final", "both"), camera=("left", "right", "both")
)
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
......@@ -2194,7 +2189,7 @@ class Food101TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Food101
FEATURE_TYPES = (PIL.Image.Image, int)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
def inject_fake_data(self, tmpdir: str, config):
root_folder = pathlib.Path(tmpdir) / "food-101"
......@@ -2229,7 +2224,7 @@ class Food101TestCase(datasets_utils.ImageDatasetTestCase):
class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FGVCAircraft
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer")
)
......@@ -2312,7 +2307,7 @@ class DTDTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.DTD
FEATURE_TYPES = (PIL.Image.Image, int)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train", "test", "val"),
# There is no need to test the whole matrix here, since each fold is treated exactly the same
partition=(1, 5, 10),
......@@ -2346,7 +2341,7 @@ class DTDTestCase(datasets_utils.ImageDatasetTestCase):
class FER2013TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FER2013
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
......@@ -2381,7 +2376,7 @@ class GTSRBTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.GTSRB
FEATURE_TYPES = (PIL.Image.Image, int)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
def inject_fake_data(self, tmpdir: str, config):
root_folder = os.path.join(tmpdir, "gtsrb")
......@@ -2431,7 +2426,7 @@ class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.CLEVRClassification
FEATURE_TYPES = (PIL.Image.Image, (int, type(None)))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
def inject_fake_data(self, tmpdir, config):
data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0"
......@@ -2463,7 +2458,7 @@ class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.OxfordIIITPet
FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None)))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("trainval", "test"),
target_types=("category", "segmentation", ["category", "segmentation"], []),
)
......@@ -2522,7 +2517,7 @@ class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase):
class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.StanfordCars
REQUIRED_PACKAGES = ("scipy",)
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
def inject_fake_data(self, tmpdir, config):
import scipy.io as io
......@@ -2566,7 +2561,7 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase):
class Country211TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Country211
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "valid", "test"))
def inject_fake_data(self, tmpdir: str, config):
split_folder = pathlib.Path(tmpdir) / "country211" / config["split"]
......@@ -2593,7 +2588,7 @@ class Country211TestCase(datasets_utils.ImageDatasetTestCase):
class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Flowers102
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
REQUIRED_PACKAGES = ("scipy",)
def inject_fake_data(self, tmpdir: str, config):
......@@ -2629,7 +2624,7 @@ class Flowers102TestCase(datasets_utils.ImageDatasetTestCase):
class PCAMTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.PCAM
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
REQUIRED_PACKAGES = ("h5py",)
def inject_fake_data(self, tmpdir: str, config):
......@@ -2651,7 +2646,7 @@ class PCAMTestCase(datasets_utils.ImageDatasetTestCase):
class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.RenderedSST2
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test"))
SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"}
def inject_fake_data(self, tmpdir: str, config):
......@@ -2673,7 +2668,7 @@ class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase):
class Kitti2012StereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Kitti2012Stereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
def inject_fake_data(self, tmpdir, config):
......@@ -2735,7 +2730,7 @@ class Kitti2012StereoTestCase(datasets_utils.ImageDatasetTestCase):
class Kitti2015StereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Kitti2015Stereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
def inject_fake_data(self, tmpdir, config):
......@@ -2873,7 +2868,7 @@ class CREStereoTestCase(datasets_utils.ImageDatasetTestCase):
class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.FallingThingsStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(variant=("single", "mixed", "both"))
ADDITIONAL_CONFIGS = combinations_grid(variant=("single", "mixed", "both"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
@staticmethod
......@@ -2947,7 +2942,7 @@ class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase):
class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.SceneFlowStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
variant=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final", "both")
)
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
......@@ -3034,7 +3029,7 @@ class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase):
class InStereo2k(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.InStereo2k
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
@staticmethod
def _make_scene_folder(root: str, name: str, size: Tuple[int, int]):
......@@ -3076,7 +3071,7 @@ class InStereo2k(datasets_utils.ImageDatasetTestCase):
class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.SintelStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(pass_name=("final", "clean", "both"))
ADDITIONAL_CONFIGS = combinations_grid(pass_name=("final", "clean", "both"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
def inject_fake_data(self, tmpdir, config):
......@@ -3152,7 +3147,7 @@ class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase):
class ETH3DStereoestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.ETH3DStereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"))
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
@staticmethod
......@@ -3219,7 +3214,7 @@ class ETH3DStereoestCase(datasets_utils.ImageDatasetTestCase):
class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase):
DATASET_CLASS = datasets.Middlebury2014Stereo
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(
ADDITIONAL_CONFIGS = combinations_grid(
split=("train", "additional"),
calibration=("perfect", "imperfect", "both"),
use_ambient_views=(True, False),
......
import pytest
import torch
from PIL import Image
from torchvision import datapoints
from torchvision.prototype import datapoints as proto_datapoints
......@@ -134,30 +131,3 @@ def test_wrap_like():
assert type(label_new) is proto_datapoints.Label
assert label_new.data_ptr() == output.data_ptr()
assert label_new.categories is label.categories
@pytest.mark.parametrize("data", [torch.rand(3, 32, 32), Image.new("RGB", (32, 32), color=123)])
def test_image_instance(data):
image = datapoints.Image(data)
assert isinstance(image, torch.Tensor)
assert image.ndim == 3 and image.shape[0] == 3
@pytest.mark.parametrize("data", [torch.randint(0, 10, size=(1, 32, 32)), Image.new("L", (32, 32), color=2)])
def test_mask_instance(data):
mask = datapoints.Mask(data)
assert isinstance(mask, torch.Tensor)
assert mask.ndim == 3 and mask.shape[0] == 1
@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]])
@pytest.mark.parametrize(
"format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
)
def test_bbox_instance(data, format):
bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32))
assert isinstance(bboxes, torch.Tensor)
assert bboxes.ndim == 2 and bboxes.shape[1] == 4
if isinstance(format, str):
format = datapoints.BoundingBoxFormat.from_str(format.upper())
assert bboxes.format == format
import itertools
import pathlib
import random
import re
import warnings
from collections import defaultdict
import numpy as np
import re
import PIL.Image
import pytest
import torch
import torchvision.prototype.datapoints as proto_datapoints
import torchvision.prototype.transforms as proto_transforms
import torchvision.transforms.v2 as transforms
import torchvision.transforms.v2.utils
from common_utils import cpu_and_gpu
from prototype_common_utils import (
from common_utils import (
assert_equal,
DEFAULT_EXTRA_DIMS,
make_bounding_box,
make_bounding_boxes,
make_detection_mask,
make_image,
make_images,
make_label,
make_one_hot_labels,
make_segmentation_mask,
make_video,
make_videos,
)
from torch.utils._pytree import tree_flatten, tree_unflatten
from torchvision import datapoints
from torchvision.ops.boxes import box_iou
from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image
from torchvision.transforms.v2 import functional as F
from torchvision.transforms.v2._utils import _convert_fill_arg
from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw
BATCH_EXTRA_DIMS = [extra_dims for extra_dims in DEFAULT_EXTRA_DIMS if extra_dims]
def make_vanilla_tensor_images(*args, **kwargs):
for image in make_images(*args, **kwargs):
if image.ndim > 3:
continue
yield image.data
def make_pil_images(*args, **kwargs):
for image in make_vanilla_tensor_images(*args, **kwargs):
yield to_pil_image(image)
from prototype_common_utils import make_label, make_one_hot_labels
from torchvision.datapoints import BoundingBox, BoundingBoxFormat, Image, Mask, Video
from torchvision.prototype import datapoints, transforms
from torchvision.transforms.v2._utils import _convert_fill_arg
from torchvision.transforms.v2.functional import InterpolationMode, pil_to_tensor, to_image_pil
from torchvision.transforms.v2.utils import check_type, is_simple_tensor
def make_vanilla_tensor_bounding_boxes(*args, **kwargs):
for bounding_box in make_bounding_boxes(*args, **kwargs):
yield bounding_box.data
BATCH_EXTRA_DIMS = [extra_dims for extra_dims in DEFAULT_EXTRA_DIMS if extra_dims]
def parametrize(transforms_with_inputs):
......@@ -73,1541 +44,47 @@ def parametrize(transforms_with_inputs):
)
def auto_augment_adapter(transform, input, device):
adapted_input = {}
image_or_video_found = False
for key, value in input.items():
if isinstance(value, (datapoints.BoundingBox, datapoints.Mask)):
# AA transforms don't support bounding boxes or masks
continue
elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor, PIL.Image.Image)):
if image_or_video_found:
# AA transforms only support a single image or video
continue
image_or_video_found = True
adapted_input[key] = value
return adapted_input
def linear_transformation_adapter(transform, input, device):
flat_inputs = list(input.values())
c, h, w = query_chw(
[
item
for item, needs_transform in zip(flat_inputs, transforms.Transform()._needs_transform_list(flat_inputs))
if needs_transform
]
)
num_elements = c * h * w
transform.transformation_matrix = torch.randn((num_elements, num_elements), device=device)
transform.mean_vector = torch.randn((num_elements,), device=device)
return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)}
def normalize_adapter(transform, input, device):
adapted_input = {}
for key, value in input.items():
if isinstance(value, PIL.Image.Image):
# normalize doesn't support PIL images
continue
elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor)):
# normalize doesn't support integer images
value = F.convert_dtype(value, torch.float32)
adapted_input[key] = value
return adapted_input
class TestSmoke:
@pytest.mark.parametrize(
("transform", "adapter"),
[
(transforms.RandomErasing(p=1.0), None),
(transforms.AugMix(), auto_augment_adapter),
(transforms.AutoAugment(), auto_augment_adapter),
(transforms.RandAugment(), auto_augment_adapter),
(transforms.TrivialAugmentWide(), auto_augment_adapter),
(transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None),
(transforms.Grayscale(), None),
(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1.0), None),
(transforms.RandomAutocontrast(p=1.0), None),
(transforms.RandomEqualize(p=1.0), None),
(transforms.RandomGrayscale(p=1.0), None),
(transforms.RandomInvert(p=1.0), None),
(transforms.RandomPhotometricDistort(p=1.0), None),
(transforms.RandomPosterize(bits=4, p=1.0), None),
(transforms.RandomSolarize(threshold=0.5, p=1.0), None),
(transforms.CenterCrop([16, 16]), None),
(transforms.ElasticTransform(sigma=1.0), None),
(transforms.Pad(4), None),
(transforms.RandomAffine(degrees=30.0), None),
(transforms.RandomCrop([16, 16], pad_if_needed=True), None),
(transforms.RandomHorizontalFlip(p=1.0), None),
(transforms.RandomPerspective(p=1.0), None),
(transforms.RandomResize(min_size=10, max_size=20), None),
(transforms.RandomResizedCrop([16, 16]), None),
(transforms.RandomRotation(degrees=30), None),
(transforms.RandomShortestSize(min_size=10), None),
(transforms.RandomVerticalFlip(p=1.0), None),
(transforms.RandomZoomOut(p=1.0), None),
(transforms.Resize([16, 16], antialias=True), None),
(transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2)), None),
(transforms.ClampBoundingBox(), None),
(transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None),
(transforms.ConvertDtype(), None),
(transforms.GaussianBlur(kernel_size=3), None),
(
transforms.LinearTransformation(
# These are just dummy values that will be filled by the adapter. We can't define them upfront,
# because for we neither know the spatial size nor the device at this point
transformation_matrix=torch.empty((1, 1)),
mean_vector=torch.empty((1,)),
),
linear_transformation_adapter,
),
(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), normalize_adapter),
(transforms.ToDtype(torch.float64), None),
(transforms.UniformTemporalSubsample(num_samples=2), None),
],
ids=lambda transform: type(transform).__name__,
)
@pytest.mark.parametrize("container_type", [dict, list, tuple])
@pytest.mark.parametrize(
"image_or_video",
[
make_image(),
make_video(),
next(make_pil_images(color_spaces=["RGB"])),
next(make_vanilla_tensor_images()),
],
)
@pytest.mark.parametrize("device", cpu_and_gpu())
def test_common(self, transform, adapter, container_type, image_or_video, device):
spatial_size = F.get_spatial_size(image_or_video)
input = dict(
image_or_video=image_or_video,
image_datapoint=make_image(size=spatial_size),
video_datapoint=make_video(size=spatial_size),
image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])),
bounding_box_xyxy=make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,)
),
bounding_box_xywh=make_bounding_box(
format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,)
),
bounding_box_cxcywh=make_bounding_box(
format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,)
),
bounding_box_degenerate_xyxy=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[2, 0, 1, 1], # x1 > x2, y1 < y2
[0, 2, 1, 1], # x1 < x2, y1 > y2
[2, 2, 1, 1], # x1 > x2, y1 > y2
],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=spatial_size,
),
bounding_box_degenerate_xywh=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[0, 0, 1, -1], # negative height
[0, 0, -1, 1], # negative width
[0, 0, -1, -1], # negative height and width
],
format=datapoints.BoundingBoxFormat.XYWH,
spatial_size=spatial_size,
),
bounding_box_degenerate_cxcywh=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[0, 0, 1, -1], # negative height
[0, 0, -1, 1], # negative width
[0, 0, -1, -1], # negative height and width
],
format=datapoints.BoundingBoxFormat.CXCYWH,
spatial_size=spatial_size,
),
detection_mask=make_detection_mask(size=spatial_size),
segmentation_mask=make_segmentation_mask(size=spatial_size),
int=0,
float=0.0,
bool=True,
none=None,
str="str",
path=pathlib.Path.cwd(),
object=object(),
tensor=torch.empty(5),
array=np.empty(5),
@parametrize(
[
(
transform,
[
dict(inpt=inpt, one_hot_label=one_hot_label)
for inpt, one_hot_label in itertools.product(
itertools.chain(
make_images(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
make_videos(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
),
make_one_hot_labels(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
)
],
)
if adapter is not None:
input = adapter(transform, input, device)
if container_type in {tuple, list}:
input = container_type(input.values())
input_flat, input_spec = tree_flatten(input)
input_flat = [item.to(device) if isinstance(item, torch.Tensor) else item for item in input_flat]
input = tree_unflatten(input_flat, input_spec)
torch.manual_seed(0)
output = transform(input)
output_flat, output_spec = tree_flatten(output)
assert output_spec == input_spec
for output_item, input_item, should_be_transformed in zip(
output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat)
):
if should_be_transformed:
assert type(output_item) is type(input_item)
else:
assert output_item is input_item
if isinstance(input_item, datapoints.BoundingBox) and not isinstance(
transform, transforms.ConvertBoundingBoxFormat
):
assert output_item.format == input_item.format
# Enforce that the transform does not turn a degenerate box marked by RandomIoUCrop (or any other future
# transform that does this), back into a valid one.
# TODO: we should test that against all degenerate boxes above
for format in list(datapoints.BoundingBoxFormat):
sample = dict(
boxes=datapoints.BoundingBox([[0, 0, 0, 0]], format=format, spatial_size=(224, 244)),
labels=torch.tensor([3]),
)
assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4)
@parametrize(
[
(
transform,
[
dict(inpt=inpt, one_hot_label=one_hot_label)
for inpt, one_hot_label in itertools.product(
itertools.chain(
make_images(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
make_videos(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
),
make_one_hot_labels(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]),
)
],
)
for transform in [
proto_transforms.RandomMixup(alpha=1.0),
proto_transforms.RandomCutmix(alpha=1.0),
]
]
)
def test_mixup_cutmix(self, transform, input):
transform(input)
# add other data that should bypass and won't raise any error
input_copy = dict(input)
input_copy["path"] = "/path/to/somewhere"
input_copy["num"] = 1234
transform(input_copy)
# Check if we raise an error if sample contains bbox or mask or label
err_msg = "does not support PIL images, bounding boxes, masks and plain labels"
input_copy = dict(input)
for unsup_data in [
make_label(),
make_bounding_box(format="XYXY"),
make_detection_mask(),
make_segmentation_mask(),
]:
input_copy["unsupported"] = unsup_data
with pytest.raises(TypeError, match=err_msg):
transform(input_copy)
@parametrize(
[
(
transform,
itertools.chain.from_iterable(
fn(
color_spaces=[
"GRAY",
"RGB",
],
dtypes=[torch.uint8],
extra_dims=[(), (4,)],
**(dict(num_frames=["random"]) if fn is make_videos else dict()),
)
for fn in [
make_images,
make_vanilla_tensor_images,
make_pil_images,
make_videos,
]
),
)
for transform in (
transforms.RandAugment(),
transforms.TrivialAugmentWide(),
transforms.AutoAugment(),
transforms.AugMix(),
)
]
)
def test_auto_augment(self, transform, input):
transform(input)
@parametrize(
[
(
transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),
itertools.chain.from_iterable(
fn(color_spaces=["RGB"], dtypes=[torch.float32])
for fn in [
make_images,
make_vanilla_tensor_images,
make_videos,
]
),
),
for transform in [
transforms.RandomMixup(alpha=1.0),
transforms.RandomCutmix(alpha=1.0),
]
)
def test_normalize(self, transform, input):
transform(input)
@parametrize(
[
(
transforms.RandomResizedCrop([16, 16], antialias=True),
itertools.chain(
make_images(extra_dims=[(4,)]),
make_vanilla_tensor_images(),
make_pil_images(),
make_videos(extra_dims=[()]),
),
)
]
)
def test_random_resized_crop(self, transform, input):
transform(input)
@pytest.mark.parametrize(
"flat_inputs",
itertools.permutations(
[
next(make_vanilla_tensor_images()),
next(make_vanilla_tensor_images()),
next(make_pil_images()),
make_image(),
next(make_videos()),
],
3,
),
]
)
def test_simple_tensor_heuristic(flat_inputs):
def split_on_simple_tensor(to_split):
# This takes a sequence that is structurally aligned with `flat_inputs` and splits its items into three parts:
# 1. The first simple tensor. If none is present, this will be `None`
# 2. A list of the remaining simple tensors
# 3. A list of all other items
simple_tensors = []
others = []
# Splitting always happens on the original `flat_inputs` to avoid any erroneous type changes by the transform to
# affect the splitting.
for item, inpt in zip(to_split, flat_inputs):
(simple_tensors if is_simple_tensor(inpt) else others).append(item)
return simple_tensors[0] if simple_tensors else None, simple_tensors[1:], others
class CopyCloneTransform(transforms.Transform):
def _transform(self, inpt, params):
return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy()
@staticmethod
def was_applied(output, inpt):
identity = output is inpt
if identity:
return False
# Make sure nothing fishy is going on
assert_equal(output, inpt)
return True
first_simple_tensor_input, other_simple_tensor_inputs, other_inputs = split_on_simple_tensor(flat_inputs)
transform = CopyCloneTransform()
transformed_sample = transform(flat_inputs)
first_simple_tensor_output, other_simple_tensor_outputs, other_outputs = split_on_simple_tensor(transformed_sample)
if first_simple_tensor_input is not None:
if other_inputs:
assert not transform.was_applied(first_simple_tensor_output, first_simple_tensor_input)
else:
assert transform.was_applied(first_simple_tensor_output, first_simple_tensor_input)
for output, inpt in zip(other_simple_tensor_outputs, other_simple_tensor_inputs):
assert not transform.was_applied(output, inpt)
for input, output in zip(other_inputs, other_outputs):
assert transform.was_applied(output, input)
@pytest.mark.parametrize("p", [0.0, 1.0])
class TestRandomHorizontalFlip:
def input_expected_image_tensor(self, p, dtype=torch.float32):
input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype)
expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype)
return input, expected if p == 1 else input
def test_simple_tensor(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(input)
assert_equal(expected, actual)
def test_pil_image(self, p):
input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(to_pil_image(input))
assert_equal(expected, pil_to_tensor(actual))
def test_datapoints_image(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(datapoints.Image(input))
assert_equal(datapoints.Image(expected), actual)
def test_datapoints_mask(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(datapoints.Mask(input))
assert_equal(datapoints.Mask(expected), actual)
def test_datapoints_bounding_box(self, p):
input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(input)
expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input
expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
assert_equal(expected, actual)
assert actual.format == expected.format
assert actual.spatial_size == expected.spatial_size
@pytest.mark.parametrize("p", [0.0, 1.0])
class TestRandomVerticalFlip:
def input_expected_image_tensor(self, p, dtype=torch.float32):
input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype)
expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype)
return input, expected if p == 1 else input
def test_simple_tensor(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(input)
assert_equal(expected, actual)
def test_pil_image(self, p):
input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(to_pil_image(input))
assert_equal(expected, pil_to_tensor(actual))
def test_datapoints_image(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(datapoints.Image(input))
assert_equal(datapoints.Image(expected), actual)
def test_datapoints_mask(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(datapoints.Mask(input))
assert_equal(datapoints.Mask(expected), actual)
def test_datapoints_bounding_box(self, p):
input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(input)
expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input
expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
assert_equal(expected, actual)
assert actual.format == expected.format
assert actual.spatial_size == expected.spatial_size
class TestPad:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.Pad("abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.Pad([-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.Pad(12, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.Pad(12, padding_mode="abc")
@pytest.mark.parametrize("padding", [1, (1, 2), [1, 2, 3, 4]])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("padding_mode", ["constant", "edge"])
def test__transform(self, padding, fill, padding_mode, mocker):
transform = transforms.Pad(padding, fill=fill, padding_mode=padding_mode)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
inpt = mocker.MagicMock(spec=datapoints.Image)
_ = transform(inpt)
fill = transforms._utils._convert_fill_arg(fill)
if isinstance(padding, tuple):
padding = list(padding)
fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode)
@pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}])
def test__transform_image_mask(self, fill, mocker):
transform = transforms.Pad(1, fill=fill, padding_mode="constant")
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
image = datapoints.Image(torch.rand(3, 32, 32))
mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32)))
inpt = [image, mask]
_ = transform(inpt)
if isinstance(fill, int):
fill = transforms._utils._convert_fill_arg(fill)
calls = [
mocker.call(image, padding=1, fill=fill, padding_mode="constant"),
mocker.call(mask, padding=1, fill=fill, padding_mode="constant"),
]
else:
fill_img = transforms._utils._convert_fill_arg(fill[type(image)])
fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)])
calls = [
mocker.call(image, padding=1, fill=fill_img, padding_mode="constant"),
mocker.call(mask, padding=1, fill=fill_mask, padding_mode="constant"),
]
fn.assert_has_calls(calls)
class TestRandomZoomOut:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomZoomOut(fill="abc")
with pytest.raises(TypeError, match="should be a sequence of length"):
transforms.RandomZoomOut(0, side_range=0)
with pytest.raises(ValueError, match="Invalid canvas side range"):
transforms.RandomZoomOut(0, side_range=[4.0, 1.0])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__get_params(self, fill, side_range, mocker):
transform = transforms.RandomZoomOut(fill=fill, side_range=side_range)
image = mocker.MagicMock(spec=datapoints.Image)
h, w = image.spatial_size = (24, 32)
params = transform._get_params([image])
assert len(params["padding"]) == 4
assert 0 <= params["padding"][0] <= (side_range[1] - 1) * w
assert 0 <= params["padding"][1] <= (side_range[1] - 1) * h
assert 0 <= params["padding"][2] <= (side_range[1] - 1) * w
assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__transform(self, fill, side_range, mocker):
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
transform = transforms.RandomZoomOut(fill=fill, side_range=side_range, p=1)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill)
@pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}])
def test__transform_image_mask(self, fill, mocker):
transform = transforms.RandomZoomOut(fill=fill, p=1.0)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
image = datapoints.Image(torch.rand(3, 32, 32))
mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32)))
inpt = [image, mask]
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params(inpt)
if isinstance(fill, int):
fill = transforms._utils._convert_fill_arg(fill)
calls = [
mocker.call(image, **params, fill=fill),
mocker.call(mask, **params, fill=fill),
]
else:
fill_img = transforms._utils._convert_fill_arg(fill[type(image)])
fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)])
calls = [
mocker.call(image, **params, fill=fill_img),
mocker.call(mask, **params, fill=fill_mask),
]
fn.assert_has_calls(calls)
class TestRandomRotation:
def test_assertions(self):
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomRotation(-0.7)
for d in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
transforms.RandomRotation(d)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomRotation(12, fill="abc")
with pytest.raises(TypeError, match="center should be a sequence of length"):
transforms.RandomRotation(12, center=12)
with pytest.raises(ValueError, match="center should be a sequence of length"):
transforms.RandomRotation(12, center=[1, 2, 3])
def test__get_params(self):
angle_bound = 34
transform = transforms.RandomRotation(angle_bound)
params = transform._get_params(None)
assert -angle_bound <= params["angle"] <= angle_bound
angle_bounds = [12, 34]
transform = transforms.RandomRotation(angle_bounds)
params = transform._get_params(None)
assert angle_bounds[0] <= params["angle"] <= angle_bounds[1]
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("expand", [False, True])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("center", [None, [2.0, 3.0]])
def test__transform(self, degrees, expand, fill, center, mocker):
interpolation = InterpolationMode.BILINEAR
transform = transforms.RandomRotation(
degrees, interpolation=interpolation, expand=expand, fill=fill, center=center
)
if isinstance(degrees, (tuple, list)):
assert transform.degrees == [float(degrees[0]), float(degrees[1])]
else:
assert transform.degrees == [float(-degrees), float(degrees)]
fn = mocker.patch("torchvision.transforms.v2.functional.rotate")
inpt = mocker.MagicMock(spec=datapoints.Image)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params(inpt)
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center)
@pytest.mark.parametrize("angle", [34, -87])
@pytest.mark.parametrize("expand", [False, True])
def test_boundingbox_spatial_size(self, angle, expand):
# Specific test for BoundingBox.rotate
bbox = datapoints.BoundingBox(
torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32)
)
img = datapoints.Image(torch.rand(1, 3, 32, 32))
out_img = img.rotate(angle, expand=expand)
out_bbox = bbox.rotate(angle, expand=expand)
assert out_img.spatial_size == out_bbox.spatial_size
class TestRandomAffine:
def test_assertions(self):
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomAffine(-0.7)
for d in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
transforms.RandomAffine(d)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomAffine(12, fill="abc")
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomAffine(12, fill="abc")
for kwargs in [
{"center": 12},
{"translate": 12},
{"scale": 12},
]:
with pytest.raises(TypeError, match="should be a sequence of length"):
transforms.RandomAffine(12, **kwargs)
for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]:
with pytest.raises(ValueError, match="should be a sequence of length"):
transforms.RandomAffine(12, **kwargs)
with pytest.raises(ValueError, match="translation values should be between 0 and 1"):
transforms.RandomAffine(12, translate=[-1.0, 2.0])
with pytest.raises(ValueError, match="scale values should be positive"):
transforms.RandomAffine(12, scale=[-1.0, 2.0])
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomAffine(12, shear=-10)
for s in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="shear should be a sequence of length 2"):
transforms.RandomAffine(12, shear=s)
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
@pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
@pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
def test__get_params(self, degrees, translate, scale, shear, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
h, w = image.spatial_size
transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear)
params = transform._get_params([image])
if not isinstance(degrees, (list, tuple)):
assert -degrees <= params["angle"] <= degrees
else:
assert degrees[0] <= params["angle"] <= degrees[1]
if translate is not None:
w_max = int(round(translate[0] * w))
h_max = int(round(translate[1] * h))
assert -w_max <= params["translate"][0] <= w_max
assert -h_max <= params["translate"][1] <= h_max
else:
assert params["translate"] == (0, 0)
if scale is not None:
assert scale[0] <= params["scale"] <= scale[1]
else:
assert params["scale"] == 1.0
if shear is not None:
if isinstance(shear, float):
assert -shear <= params["shear"][0] <= shear
assert params["shear"][1] == 0.0
elif len(shear) == 2:
assert shear[0] <= params["shear"][0] <= shear[1]
assert params["shear"][1] == 0.0
else:
assert shear[0] <= params["shear"][0] <= shear[1]
assert shear[2] <= params["shear"][1] <= shear[3]
else:
assert params["shear"] == (0, 0)
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
@pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
@pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("center", [None, [2.0, 3.0]])
def test__transform(self, degrees, translate, scale, shear, fill, center, mocker):
interpolation = InterpolationMode.BILINEAR
transform = transforms.RandomAffine(
degrees,
translate=translate,
scale=scale,
shear=shear,
interpolation=interpolation,
fill=fill,
center=center,
)
if isinstance(degrees, (tuple, list)):
assert transform.degrees == [float(degrees[0]), float(degrees[1])]
else:
assert transform.degrees == [float(-degrees), float(degrees)]
fn = mocker.patch("torchvision.transforms.v2.functional.affine")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center)
class TestRandomCrop:
def test_assertions(self):
with pytest.raises(ValueError, match="Please provide only two dimensions"):
transforms.RandomCrop([10, 12, 14])
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.RandomCrop([10, 12], padding="abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomCrop([10, 12], padding=1, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.RandomCrop([10, 12], padding=1, padding_mode="abc")
@pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]])
@pytest.mark.parametrize("size, pad_if_needed", [((10, 10), False), ((50, 25), True)])
def test__get_params(self, padding, pad_if_needed, size, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
h, w = image.spatial_size
transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed)
params = transform._get_params([image])
if padding is not None:
if isinstance(padding, int):
pad_top = pad_bottom = pad_left = pad_right = padding
elif isinstance(padding, list) and len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
elif isinstance(padding, list) and len(padding) == 4:
pad_left, pad_top, pad_right, pad_bottom = padding
h += pad_top + pad_bottom
w += pad_left + pad_right
else:
pad_left = pad_right = pad_top = pad_bottom = 0
if pad_if_needed:
if w < size[1]:
diff = size[1] - w
pad_left += diff
pad_right += diff
w += 2 * diff
if h < size[0]:
diff = size[0] - h
pad_top += diff
pad_bottom += diff
h += 2 * diff
padding = [pad_left, pad_top, pad_right, pad_bottom]
assert 0 <= params["top"] <= h - size[0] + 1
assert 0 <= params["left"] <= w - size[1] + 1
assert params["height"] == size[0]
assert params["width"] == size[1]
assert params["needs_pad"] is any(padding)
assert params["padding"] == padding
@pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]])
@pytest.mark.parametrize("pad_if_needed", [False, True])
@pytest.mark.parametrize("fill", [False, True])
@pytest.mark.parametrize("padding_mode", ["constant", "edge"])
def test__transform(self, padding, pad_if_needed, fill, padding_mode, mocker):
output_size = [10, 12]
transform = transforms.RandomCrop(
output_size, padding=padding, pad_if_needed=pad_if_needed, fill=fill, padding_mode=padding_mode
)
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (32, 32)
expected = mocker.MagicMock(spec=datapoints.Image)
expected.num_channels = 3
if isinstance(padding, int):
expected.spatial_size = (inpt.spatial_size[0] + padding, inpt.spatial_size[1] + padding)
elif isinstance(padding, list):
expected.spatial_size = (
inpt.spatial_size[0] + sum(padding[0::2]),
inpt.spatial_size[1] + sum(padding[1::2]),
)
else:
expected.spatial_size = inpt.spatial_size
_ = mocker.patch("torchvision.transforms.v2.functional.pad", return_value=expected)
fn_crop = mocker.patch("torchvision.transforms.v2.functional.crop")
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
if padding is None and not pad_if_needed:
fn_crop.assert_called_once_with(
inpt, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1]
)
elif not pad_if_needed:
fn_crop.assert_called_once_with(
expected, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1]
)
elif padding is None:
# vfdev-5: I do not know how to mock and test this case
pass
else:
# vfdev-5: I do not know how to mock and test this case
pass
class TestGaussianBlur:
def test_assertions(self):
with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"):
transforms.GaussianBlur([10, 12, 14])
with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"):
transforms.GaussianBlur(4)
with pytest.raises(
TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats."
):
transforms.GaussianBlur(3, sigma=[1, 2, 3])
with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"):
transforms.GaussianBlur(3, sigma=-1.0)
with pytest.raises(ValueError, match="sigma values should be positive and of the form"):
transforms.GaussianBlur(3, sigma=[2.0, 1.0])
@pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]])
def test__get_params(self, sigma):
transform = transforms.GaussianBlur(3, sigma=sigma)
params = transform._get_params([])
if isinstance(sigma, float):
assert params["sigma"][0] == params["sigma"][1] == 10
else:
assert sigma[0] <= params["sigma"][0] <= sigma[1]
assert sigma[0] <= params["sigma"][1] <= sigma[1]
@pytest.mark.parametrize("kernel_size", [3, [3, 5], (5, 3)])
@pytest.mark.parametrize("sigma", [2.0, [2.0, 3.0]])
def test__transform(self, kernel_size, sigma, mocker):
transform = transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma)
if isinstance(kernel_size, (tuple, list)):
assert transform.kernel_size == kernel_size
else:
kernel_size = (kernel_size, kernel_size)
assert transform.kernel_size == kernel_size
if isinstance(sigma, (tuple, list)):
assert transform.sigma == sigma
else:
assert transform.sigma == [sigma, sigma]
fn = mocker.patch("torchvision.transforms.v2.functional.gaussian_blur")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
fn.assert_called_once_with(inpt, kernel_size, **params)
class TestRandomColorOp:
@pytest.mark.parametrize("p", [0.0, 1.0])
@pytest.mark.parametrize(
"transform_cls, func_op_name, kwargs",
[
(transforms.RandomEqualize, "equalize", {}),
(transforms.RandomInvert, "invert", {}),
(transforms.RandomAutocontrast, "autocontrast", {}),
(transforms.RandomPosterize, "posterize", {"bits": 4}),
(transforms.RandomSolarize, "solarize", {"threshold": 0.5}),
(transforms.RandomAdjustSharpness, "adjust_sharpness", {"sharpness_factor": 0.5}),
],
)
def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker):
transform = transform_cls(p=p, **kwargs)
fn = mocker.patch(f"torchvision.transforms.v2.functional.{func_op_name}")
inpt = mocker.MagicMock(spec=datapoints.Image)
_ = transform(inpt)
if p > 0.0:
fn.assert_called_once_with(inpt, **kwargs)
else:
assert fn.call_count == 0
class TestRandomPerspective:
def test_assertions(self):
with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"):
transforms.RandomPerspective(distortion_scale=-1.0)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomPerspective(0.5, fill="abc")
def test__get_params(self, mocker):
dscale = 0.5
transform = transforms.RandomPerspective(dscale)
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
params = transform._get_params([image])
h, w = image.spatial_size
assert "coefficients" in params
assert len(params["coefficients"]) == 8
@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker):
interpolation = InterpolationMode.BILINEAR
fill = 12
transform = transforms.RandomPerspective(distortion_scale, fill=fill, interpolation=interpolation)
fn = mocker.patch("torchvision.transforms.v2.functional.perspective")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)
class TestElasticTransform:
def test_assertions(self):
with pytest.raises(TypeError, match="alpha should be float or a sequence of floats"):
transforms.ElasticTransform({})
with pytest.raises(ValueError, match="alpha is a sequence its length should be one of 2"):
transforms.ElasticTransform([1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="alpha should be a sequence of floats"):
transforms.ElasticTransform([1, 2])
with pytest.raises(TypeError, match="sigma should be float or a sequence of floats"):
transforms.ElasticTransform(1.0, {})
with pytest.raises(ValueError, match="sigma is a sequence its length should be one of 2"):
transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="sigma should be a sequence of floats"):
transforms.ElasticTransform(1.0, [1, 2])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.ElasticTransform(1.0, 2.0, fill="abc")
def test__get_params(self, mocker):
alpha = 2.0
sigma = 3.0
transform = transforms.ElasticTransform(alpha, sigma)
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
params = transform._get_params([image])
h, w = image.spatial_size
displacement = params["displacement"]
assert displacement.shape == (1, h, w, 2)
assert (-alpha / w <= displacement[0, ..., 0]).all() and (displacement[0, ..., 0] <= alpha / w).all()
assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all()
@pytest.mark.parametrize("alpha", [5.0, [5.0, 10.0]])
@pytest.mark.parametrize("sigma", [2.0, [2.0, 5.0]])
def test__transform(self, alpha, sigma, mocker):
interpolation = InterpolationMode.BILINEAR
fill = 12
transform = transforms.ElasticTransform(alpha, sigma=sigma, fill=fill, interpolation=interpolation)
if isinstance(alpha, float):
assert transform.alpha == [alpha, alpha]
else:
assert transform.alpha == alpha
if isinstance(sigma, float):
assert transform.sigma == [sigma, sigma]
else:
assert transform.sigma == sigma
fn = mocker.patch("torchvision.transforms.v2.functional.elastic")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# Let's mock transform._get_params to control the output:
transform._get_params = mocker.MagicMock()
_ = transform(inpt)
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
class TestRandomErasing:
def test_assertions(self, mocker):
with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"):
transforms.RandomErasing(value={})
with pytest.raises(ValueError, match="If value is str, it should be 'random'"):
transforms.RandomErasing(value="abc")
with pytest.raises(TypeError, match="Scale should be a sequence"):
transforms.RandomErasing(scale=123)
with pytest.raises(TypeError, match="Ratio should be a sequence"):
transforms.RandomErasing(ratio=123)
with pytest.raises(ValueError, match="Scale should be between 0 and 1"):
transforms.RandomErasing(scale=[-1, 2])
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
transform = transforms.RandomErasing(value=[1, 2, 3, 4])
with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"):
transform._get_params([image])
@pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"])
def test__get_params(self, value, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
transform = transforms.RandomErasing(value=value)
params = transform._get_params([image])
v = params["v"]
h, w = params["h"], params["w"]
i, j = params["i"], params["j"]
assert isinstance(v, torch.Tensor)
if value == "random":
assert v.shape == (image.num_channels, h, w)
elif isinstance(value, (int, float)):
assert v.shape == (1, 1, 1)
elif isinstance(value, (list, tuple)):
assert v.shape == (image.num_channels, 1, 1)
assert 0 <= i <= image.spatial_size[0] - h
assert 0 <= j <= image.spatial_size[1] - w
@pytest.mark.parametrize("p", [0, 1])
def test__transform(self, mocker, p):
transform = transforms.RandomErasing(p=p)
transform._transformed_types = (mocker.MagicMock,)
i_sentinel = mocker.MagicMock()
j_sentinel = mocker.MagicMock()
h_sentinel = mocker.MagicMock()
w_sentinel = mocker.MagicMock()
v_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._augment.RandomErasing._get_params",
return_value=dict(i=i_sentinel, j=j_sentinel, h=h_sentinel, w=w_sentinel, v=v_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._augment.F.erase")
output = transform(inpt_sentinel)
if p:
mock.assert_called_once_with(
inpt_sentinel,
i=i_sentinel,
j=j_sentinel,
h=h_sentinel,
w=w_sentinel,
v=v_sentinel,
inplace=transform.inplace,
)
else:
mock.assert_not_called()
assert output is inpt_sentinel
class TestTransform:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test_check_transformed_types(self, inpt_type, mocker):
# This test ensures that we correctly handle which types to transform and which to bypass
t = transforms.Transform()
inpt = mocker.MagicMock(spec=inpt_type)
if inpt_type in (np.ndarray, str, int):
output = t(inpt)
assert output is inpt
else:
with pytest.raises(NotImplementedError):
t(inpt)
class TestToImageTensor:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch(
"torchvision.transforms.v2.functional.to_image_tensor",
return_value=torch.rand(1, 3, 8, 8),
)
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToImageTensor()
transform(inpt)
if inpt_type in (datapoints.BoundingBox, datapoints.Image, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt)
class TestToImagePIL:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToImagePIL()
transform(inpt)
if inpt_type in (datapoints.BoundingBox, PIL.Image.Image, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt, mode=transform.mode)
class TestToPILImage:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToPILImage()
transform(inpt)
if inpt_type in (PIL.Image.Image, datapoints.BoundingBox, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt, mode=transform.mode)
class TestToTensor:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.functional.to_tensor")
inpt = mocker.MagicMock(spec=inpt_type)
with pytest.warns(UserWarning, match="deprecated and will be removed"):
transform = transforms.ToTensor()
transform(inpt)
if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BoundingBox, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt)
class TestContainers:
@pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder])
def test_assertions(self, transform_cls):
with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"):
transform_cls(transforms.RandomCrop(28))
@pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder])
@pytest.mark.parametrize(
"trfms",
[
[transforms.Pad(2), transforms.RandomCrop(28)],
[lambda x: 2.0 * x, transforms.Pad(2), transforms.RandomCrop(28)],
[transforms.Pad(2), lambda x: 2.0 * x, transforms.RandomCrop(28)],
],
)
def test_ctor(self, transform_cls, trfms):
c = transform_cls(trfms)
inpt = torch.rand(1, 3, 32, 32)
output = c(inpt)
assert isinstance(output, torch.Tensor)
assert output.ndim == 4
class TestRandomChoice:
def test_assertions(self):
with pytest.warns(UserWarning, match="Argument p is deprecated and will be removed"):
transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], p=[1, 2])
with pytest.raises(ValueError, match="The number of probabilities doesn't match the number of transforms"):
transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], probabilities=[1])
class TestRandomIoUCrop:
@pytest.mark.parametrize("device", cpu_and_gpu())
@pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
def test__get_params(self, device, options, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
bboxes = datapoints.BoundingBox(
torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]),
format="XYXY",
spatial_size=image.spatial_size,
device=device,
)
sample = [image, bboxes]
transform = transforms.RandomIoUCrop(sampler_options=options)
n_samples = 5
for _ in range(n_samples):
params = transform._get_params(sample)
if options == [2.0]:
assert len(params) == 0
return
assert len(params["is_within_crop_area"]) > 0
assert params["is_within_crop_area"].dtype == torch.bool
orig_h = image.spatial_size[0]
orig_w = image.spatial_size[1]
assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h)
assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w)
left, top = params["left"], params["top"]
new_h, new_w = params["height"], params["width"]
ious = box_iou(
bboxes,
torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device),
)
assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}"
def test__transform_empty_params(self, mocker):
transform = transforms.RandomIoUCrop(sampler_options=[2.0])
image = datapoints.Image(torch.rand(1, 3, 4, 4))
bboxes = datapoints.BoundingBox(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4))
label = proto_datapoints.Label(torch.tensor([1]))
sample = [image, bboxes, label]
# Let's mock transform._get_params to control the output:
transform._get_params = mocker.MagicMock(return_value={})
output = transform(sample)
torch.testing.assert_close(output, sample)
def test_forward_assertion(self):
transform = transforms.RandomIoUCrop()
with pytest.raises(
TypeError,
match="requires input sample to contain tensor or PIL images and bounding boxes",
):
transform(torch.tensor(0))
def test__transform(self, mocker):
transform = transforms.RandomIoUCrop()
image = datapoints.Image(torch.rand(3, 32, 24))
bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,))
masks = make_detection_mask((32, 24), num_objects=6)
sample = [image, bboxes, masks]
fn = mocker.patch("torchvision.transforms.v2.functional.crop", side_effect=lambda x, **params: x)
is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool)
params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area)
transform._get_params = mocker.MagicMock(return_value=params)
output = transform(sample)
assert fn.call_count == 3
expected_calls = [
mocker.call(image, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
mocker.call(bboxes, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
mocker.call(masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
]
fn.assert_has_calls(expected_calls)
# check number of bboxes vs number of labels:
output_bboxes = output[1]
assert isinstance(output_bboxes, datapoints.BoundingBox)
assert (output_bboxes[~is_within_crop_area] == 0).all()
output_masks = output[2]
assert isinstance(output_masks, datapoints.Mask)
class TestScaleJitter:
def test__get_params(self, mocker):
spatial_size = (24, 32)
target_size = (16, 12)
scale_range = (0.5, 1.5)
transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range)
sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size)
n_samples = 5
for _ in range(n_samples):
params = transform._get_params([sample])
assert "size" in params
size = params["size"]
assert isinstance(size, tuple) and len(size) == 2
height, width = size
r_min = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[0]
r_max = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[1]
assert int(spatial_size[0] * r_min) <= height <= int(spatial_size[0] * r_max)
assert int(spatial_size[1] * r_min) <= width <= int(spatial_size[1] * r_max)
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.ScaleJitter(
target_size=(16, 12), interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.ScaleJitter._get_params", return_value=dict(size=size_sentinel)
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock.assert_called_once_with(
inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
class TestRandomShortestSize:
@pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)])
def test__get_params(self, min_size, max_size, mocker):
spatial_size = (3, 10)
transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size)
sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size)
params = transform._get_params([sample])
assert "size" in params
size = params["size"]
assert isinstance(size, tuple) and len(size) == 2
longer = max(size)
shorter = min(size)
if max_size is not None:
assert longer <= max_size
assert shorter <= max_size
else:
assert shorter in min_size
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.RandomShortestSize(
min_size=[3, 5, 7], max_size=12, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.RandomShortestSize._get_params",
return_value=dict(size=size_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock.assert_called_once_with(
inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
def test_mixup_cutmix(transform, input):
transform(input)
input_copy = dict(input)
input_copy["path"] = "/path/to/somewhere"
input_copy["num"] = 1234
transform(input_copy)
# Check if we raise an error if sample contains bbox or mask or label
err_msg = "does not support PIL images, bounding boxes, masks and plain labels"
input_copy = dict(input)
for unsup_data in [
make_label(),
make_bounding_box(format="XYXY"),
make_detection_mask(),
make_segmentation_mask(),
]:
input_copy["unsupported"] = unsup_data
with pytest.raises(TypeError, match=err_msg):
transform(input_copy)
class TestSimpleCopyPaste:
......@@ -1617,27 +94,27 @@ class TestSimpleCopyPaste:
return mocker.MagicMock(spec=image_type)
def test__extract_image_targets_assertion(self, mocker):
transform = proto_transforms.SimpleCopyPaste()
transform = transforms.SimpleCopyPaste()
flat_sample = [
# images, batch size = 2
self.create_fake_image(mocker, datapoints.Image),
self.create_fake_image(mocker, Image),
# labels, bboxes, masks
mocker.MagicMock(spec=proto_datapoints.Label),
mocker.MagicMock(spec=datapoints.BoundingBox),
mocker.MagicMock(spec=datapoints.Mask),
mocker.MagicMock(spec=datapoints.Label),
mocker.MagicMock(spec=BoundingBox),
mocker.MagicMock(spec=Mask),
# labels, bboxes, masks
mocker.MagicMock(spec=datapoints.BoundingBox),
mocker.MagicMock(spec=datapoints.Mask),
mocker.MagicMock(spec=BoundingBox),
mocker.MagicMock(spec=Mask),
]
with pytest.raises(TypeError, match="requires input sample to contain equal sized list of Images"):
transform._extract_image_targets(flat_sample)
@pytest.mark.parametrize("image_type", [datapoints.Image, PIL.Image.Image, torch.Tensor])
@pytest.mark.parametrize("label_type", [proto_datapoints.Label, proto_datapoints.OneHotLabel])
@pytest.mark.parametrize("image_type", [Image, PIL.Image.Image, torch.Tensor])
@pytest.mark.parametrize("label_type", [datapoints.Label, datapoints.OneHotLabel])
def test__extract_image_targets(self, image_type, label_type, mocker):
transform = proto_transforms.SimpleCopyPaste()
transform = transforms.SimpleCopyPaste()
flat_sample = [
# images, batch size = 2
......@@ -1645,12 +122,12 @@ class TestSimpleCopyPaste:
self.create_fake_image(mocker, image_type),
# labels, bboxes, masks
mocker.MagicMock(spec=label_type),
mocker.MagicMock(spec=datapoints.BoundingBox),
mocker.MagicMock(spec=datapoints.Mask),
mocker.MagicMock(spec=BoundingBox),
mocker.MagicMock(spec=Mask),
# labels, bboxes, masks
mocker.MagicMock(spec=label_type),
mocker.MagicMock(spec=datapoints.BoundingBox),
mocker.MagicMock(spec=datapoints.Mask),
mocker.MagicMock(spec=BoundingBox),
mocker.MagicMock(spec=Mask),
]
images, targets = transform._extract_image_targets(flat_sample)
......@@ -1665,15 +142,15 @@ class TestSimpleCopyPaste:
for target in targets:
for key, type_ in [
("boxes", datapoints.BoundingBox),
("masks", datapoints.Mask),
("boxes", BoundingBox),
("masks", Mask),
("labels", label_type),
]:
assert key in target
assert isinstance(target[key], type_)
assert target[key] in flat_sample
@pytest.mark.parametrize("label_type", [proto_datapoints.Label, proto_datapoints.OneHotLabel])
@pytest.mark.parametrize("label_type", [datapoints.Label, datapoints.OneHotLabel])
def test__copy_paste(self, label_type):
image = 2 * torch.ones(3, 32, 32)
masks = torch.zeros(2, 32, 32)
......@@ -1683,13 +160,13 @@ class TestSimpleCopyPaste:
blending = True
resize_interpolation = InterpolationMode.BILINEAR
antialias = None
if label_type == proto_datapoints.OneHotLabel:
if label_type == datapoints.OneHotLabel:
labels = torch.nn.functional.one_hot(labels, num_classes=5)
target = {
"boxes": datapoints.BoundingBox(
"boxes": BoundingBox(
torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", spatial_size=(32, 32)
),
"masks": datapoints.Mask(masks),
"masks": Mask(masks),
"labels": label_type(labels),
}
......@@ -1698,17 +175,17 @@ class TestSimpleCopyPaste:
paste_masks[0, 13:19, 12:18] = 1
paste_masks[1, 15:19, 1:8] = 1
paste_labels = torch.tensor([3, 4])
if label_type == proto_datapoints.OneHotLabel:
if label_type == datapoints.OneHotLabel:
paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5)
paste_target = {
"boxes": datapoints.BoundingBox(
"boxes": BoundingBox(
torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", spatial_size=(32, 32)
),
"masks": datapoints.Mask(paste_masks),
"masks": Mask(paste_masks),
"labels": label_type(paste_labels),
}
transform = proto_transforms.SimpleCopyPaste()
transform = transforms.SimpleCopyPaste()
random_selection = torch.tensor([0, 1])
output_image, output_target = transform._copy_paste(
image, target, paste_image, paste_target, random_selection, blending, resize_interpolation, antialias
......@@ -1720,7 +197,7 @@ class TestSimpleCopyPaste:
torch.testing.assert_close(output_target["boxes"][2:, :], paste_target["boxes"])
expected_labels = torch.tensor([1, 2, 3, 4])
if label_type == proto_datapoints.OneHotLabel:
if label_type == datapoints.OneHotLabel:
expected_labels = torch.nn.functional.one_hot(expected_labels, num_classes=5)
torch.testing.assert_close(output_target["labels"], label_type(expected_labels))
......@@ -1735,13 +212,11 @@ class TestFixedSizeCrop:
batch_shape = (10,)
spatial_size = (11, 5)
transform = proto_transforms.FixedSizeCrop(size=crop_size)
transform = transforms.FixedSizeCrop(size=crop_size)
flat_inputs = [
make_image(size=spatial_size, color_space="RGB"),
make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape
),
make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape),
]
params = transform._get_params(flat_inputs)
......@@ -1763,7 +238,7 @@ class TestFixedSizeCrop:
fill_sentinel = 12
padding_mode_sentinel = mocker.MagicMock()
transform = proto_transforms.FixedSizeCrop((-1, -1), fill=fill_sentinel, padding_mode=padding_mode_sentinel)
transform = transforms.FixedSizeCrop((-1, -1), fill=fill_sentinel, padding_mode=padding_mode_sentinel)
transform._transformed_types = (mocker.MagicMock,)
mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True)
......@@ -1837,12 +312,12 @@ class TestFixedSizeCrop:
)
bounding_boxes = make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
)
masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,))
labels = make_label(extra_dims=(batch_size,))
transform = proto_transforms.FixedSizeCrop((-1, -1))
transform = transforms.FixedSizeCrop((-1, -1))
mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True)
output = transform(
......@@ -1875,11 +350,11 @@ class TestFixedSizeCrop:
)
bounding_box = make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
)
mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box")
transform = proto_transforms.FixedSizeCrop((-1, -1))
transform = transforms.FixedSizeCrop((-1, -1))
mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True)
transform(bounding_box)
......@@ -1887,178 +362,48 @@ class TestFixedSizeCrop:
mock.assert_called_once()
class TestLinearTransformation:
def test_assertions(self):
with pytest.raises(ValueError, match="transformation_matrix should be square"):
transforms.LinearTransformation(torch.rand(2, 3), torch.rand(5))
with pytest.raises(ValueError, match="mean_vector should have the same length"):
transforms.LinearTransformation(torch.rand(3, 3), torch.rand(5))
@pytest.mark.parametrize(
"inpt",
[
122 * torch.ones(1, 3, 8, 8),
122.0 * torch.ones(1, 3, 8, 8),
datapoints.Image(122 * torch.ones(1, 3, 8, 8)),
PIL.Image.new("RGB", (8, 8), (122, 122, 122)),
],
)
def test__transform(self, inpt):
v = 121 * torch.ones(3 * 8 * 8)
m = torch.ones(3 * 8 * 8, 3 * 8 * 8)
transform = transforms.LinearTransformation(m, v)
if isinstance(inpt, PIL.Image.Image):
with pytest.raises(TypeError, match="LinearTransformation does not work on PIL Images"):
transform(inpt)
else:
output = transform(inpt)
assert isinstance(output, torch.Tensor)
assert output.unique() == 3 * 8 * 8
assert output.dtype == inpt.dtype
class TestLabelToOneHot:
def test__transform(self):
categories = ["apple", "pear", "pineapple"]
labels = proto_datapoints.Label(torch.tensor([0, 1, 2, 1]), categories=categories)
transform = proto_transforms.LabelToOneHot()
labels = datapoints.Label(torch.tensor([0, 1, 2, 1]), categories=categories)
transform = transforms.LabelToOneHot()
ohe_labels = transform(labels)
assert isinstance(ohe_labels, proto_datapoints.OneHotLabel)
assert isinstance(ohe_labels, datapoints.OneHotLabel)
assert ohe_labels.shape == (4, 3)
assert ohe_labels.categories == labels.categories == categories
class TestRandomResize:
def test__get_params(self):
min_size = 3
max_size = 6
transform = transforms.RandomResize(min_size=min_size, max_size=max_size)
for _ in range(10):
params = transform._get_params([])
assert isinstance(params["size"], list) and len(params["size"]) == 1
size = params["size"][0]
assert min_size <= size < max_size
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.RandomResize(
min_size=-1, max_size=-1, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.RandomResize._get_params",
return_value=dict(size=size_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock_resize = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock_resize.assert_called_with(
inpt_sentinel, size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
class TestToDtype:
@pytest.mark.parametrize(
("dtype", "expected_dtypes"),
[
(
torch.float64,
{
datapoints.Video: torch.float64,
datapoints.Image: torch.float64,
datapoints.BoundingBox: torch.float64,
},
),
(
{datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64},
{datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64},
),
],
)
def test_call(self, dtype, expected_dtypes):
sample = dict(
video=make_video(dtype=torch.int64),
image=make_image(dtype=torch.uint8),
bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32),
str="str",
int=0,
)
transform = transforms.ToDtype(dtype)
transformed_sample = transform(sample)
for key, value in sample.items():
value_type = type(value)
transformed_value = transformed_sample[key]
# make sure the transformation retains the type
assert isinstance(transformed_value, value_type)
if isinstance(value, torch.Tensor):
assert transformed_value.dtype is expected_dtypes[value_type]
else:
assert transformed_value is value
@pytest.mark.filterwarnings("error")
def test_plain_tensor_call(self):
tensor = torch.empty((), dtype=torch.float32)
transform = transforms.ToDtype({torch.Tensor: torch.float64})
assert transform(tensor).dtype is torch.float64
@pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video])
def test_plain_tensor_warning(self, other_type):
with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")):
transforms.ToDtype(dtype={torch.Tensor: torch.float32, other_type: torch.float64})
class TestPermuteDimensions:
@pytest.mark.parametrize(
("dims", "inverse_dims"),
[
(
{datapoints.Image: (2, 1, 0), datapoints.Video: None},
{datapoints.Image: (2, 1, 0), datapoints.Video: None},
{Image: (2, 1, 0), Video: None},
{Image: (2, 1, 0), Video: None},
),
(
{datapoints.Image: (2, 1, 0), datapoints.Video: (1, 2, 3, 0)},
{datapoints.Image: (2, 1, 0), datapoints.Video: (3, 0, 1, 2)},
{Image: (2, 1, 0), Video: (1, 2, 3, 0)},
{Image: (2, 1, 0), Video: (3, 0, 1, 2)},
),
],
)
def test_call(self, dims, inverse_dims):
sample = dict(
image=make_image(),
bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
video=make_video(),
str="str",
int=0,
)
transform = proto_transforms.PermuteDimensions(dims)
transform = transforms.PermuteDimensions(dims)
transformed_sample = transform(sample)
for key, value in sample.items():
value_type = type(value)
transformed_value = transformed_sample[key]
if check_type(
value, (datapoints.Image, torchvision.transforms.v2.utils.is_simple_tensor, datapoints.Video)
):
if check_type(value, (Image, is_simple_tensor, Video)):
if transform.dims.get(value_type) is not None:
assert transformed_value.permute(inverse_dims[value_type]).equal(value)
assert type(transformed_value) == torch.Tensor
......@@ -2068,14 +413,14 @@ class TestPermuteDimensions:
@pytest.mark.filterwarnings("error")
def test_plain_tensor_call(self):
tensor = torch.empty((2, 3, 4))
transform = proto_transforms.PermuteDimensions(dims=(1, 2, 0))
transform = transforms.PermuteDimensions(dims=(1, 2, 0))
assert transform(tensor).shape == (3, 4, 2)
@pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video])
@pytest.mark.parametrize("other_type", [Image, Video])
def test_plain_tensor_warning(self, other_type):
with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")):
proto_transforms.PermuteDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)})
transforms.PermuteDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)})
class TestTransposeDimensions:
......@@ -2083,19 +428,19 @@ class TestTransposeDimensions:
"dims",
[
(-1, -2),
{datapoints.Image: (1, 2), datapoints.Video: None},
{Image: (1, 2), Video: None},
],
)
def test_call(self, dims):
sample = dict(
image=make_image(),
bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY),
bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
video=make_video(),
str="str",
int=0,
)
transform = proto_transforms.TransposeDimensions(dims)
transform = transforms.TransposeDimensions(dims)
transformed_sample = transform(sample)
for key, value in sample.items():
......@@ -2103,9 +448,7 @@ class TestTransposeDimensions:
transformed_value = transformed_sample[key]
transposed_dims = transform.dims.get(value_type)
if check_type(
value, (datapoints.Image, torchvision.transforms.v2.utils.is_simple_tensor, datapoints.Video)
):
if check_type(value, (Image, is_simple_tensor, Video)):
if transposed_dims is not None:
assert transformed_value.transpose(*transposed_dims).equal(value)
assert type(transformed_value) == torch.Tensor
......@@ -2115,372 +458,78 @@ class TestTransposeDimensions:
@pytest.mark.filterwarnings("error")
def test_plain_tensor_call(self):
tensor = torch.empty((2, 3, 4))
transform = proto_transforms.TransposeDimensions(dims=(0, 2))
transform = transforms.TransposeDimensions(dims=(0, 2))
assert transform(tensor).shape == (4, 3, 2)
@pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video])
@pytest.mark.parametrize("other_type", [Image, Video])
def test_plain_tensor_warning(self, other_type):
with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")):
proto_transforms.TransposeDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)})
class TestUniformTemporalSubsample:
@pytest.mark.parametrize(
"inpt",
[
torch.zeros(10, 3, 8, 8),
torch.zeros(1, 10, 3, 8, 8),
datapoints.Video(torch.zeros(1, 10, 3, 8, 8)),
],
)
def test__transform(self, inpt):
num_samples = 5
transform = transforms.UniformTemporalSubsample(num_samples)
output = transform(inpt)
assert type(output) is type(inpt)
assert output.shape[-4] == num_samples
assert output.dtype == inpt.dtype
# TODO: remove this test in 0.17 when the default of antialias changes to True
def test_antialias_warning():
pil_img = PIL.Image.new("RGB", size=(10, 10), color=127)
tensor_img = torch.randint(0, 256, size=(3, 10, 10), dtype=torch.uint8)
tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8)
match = "The default value of the antialias parameter"
with pytest.warns(UserWarning, match=match):
transforms.Resize((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomResizedCrop((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.ScaleJitter((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomShortestSize((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomResize(10, 20)(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.functional.resize(tensor_img, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize_image_tensor(tensor_img, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize(tensor_video, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize_video(tensor_video, (20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Image(tensor_img).resize((20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Video(tensor_video).resize((20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20))
with warnings.catch_warnings():
warnings.simplefilter("error")
transforms.Resize((20, 20))(pil_img)
transforms.RandomResizedCrop((20, 20))(pil_img)
transforms.ScaleJitter((20, 20))(pil_img)
transforms.RandomShortestSize((20, 20))(pil_img)
transforms.RandomResize(10, 20)(pil_img)
transforms.functional.resize(pil_img, (20, 20))
transforms.Resize((20, 20), antialias=True)(tensor_img)
transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img)
transforms.ScaleJitter((20, 20), antialias=True)(tensor_img)
transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img)
transforms.RandomResize(10, 20, antialias=True)(tensor_img)
transforms.functional.resize(tensor_img, (20, 20), antialias=True)
transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True)
transforms.functional.resize(tensor_video, (20, 20), antialias=True)
transforms.functional.resize_video(tensor_video, (20, 20), antialias=True)
datapoints.Image(tensor_img).resize((20, 20), antialias=True)
datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
datapoints.Video(tensor_video).resize((20, 20), antialias=True)
datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image))
@pytest.mark.parametrize("label_type", (torch.Tensor, int))
@pytest.mark.parametrize("dataset_return_type", (dict, tuple))
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor))
def test_classif_preset(image_type, label_type, dataset_return_type, to_tensor):
image = datapoints.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8))
if image_type is PIL.Image:
image = to_pil_image(image[0])
elif image_type is torch.Tensor:
image = image.as_subclass(torch.Tensor)
assert is_simple_tensor(image)
label = 1 if label_type is int else torch.tensor([1])
if dataset_return_type is dict:
sample = {
"image": image,
"label": label,
}
else:
sample = image, label
transforms.TransposeDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)})
t = transforms.Compose(
[
transforms.RandomResizedCrop((224, 224)),
transforms.RandomHorizontalFlip(p=1),
transforms.RandAugment(),
transforms.TrivialAugmentWide(),
transforms.AugMix(),
transforms.AutoAugment(),
to_tensor(),
# TODO: ConvertImageDtype is a pass-through on PIL images, is that
# intended? This results in a failure if we convert to tensor after
# it, because the image would still be uint8 which make Normalize
# fail.
transforms.ConvertImageDtype(torch.float),
transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]),
transforms.RandomErasing(p=1),
]
)
out = t(sample)
import importlib.machinery
import importlib.util
from pathlib import Path
assert type(out) == type(sample)
if dataset_return_type is tuple:
out_image, out_label = out
else:
assert out.keys() == sample.keys()
out_image, out_label = out.values()
def import_transforms_from_references(reference):
HERE = Path(__file__).parent
PROJECT_ROOT = HERE.parent
assert out_image.shape[-2:] == (224, 224)
assert out_label == label
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image))
@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite"))
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor))
@pytest.mark.parametrize("sanitize", (True, False))
def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
torch.manual_seed(0)
if data_augmentation == "hflip":
t = [
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "lsj":
t = [
transforms.ScaleJitter(target_size=(1024, 1024), antialias=True),
# Note: replaced FixedSizeCrop with RandomCrop, becuase we're
# leaving FixedSizeCrop in prototype for now, and it expects Label
# classes which we won't release yet.
# transforms.FixedSizeCrop(
# size=(1024, 1024), fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})
# ),
transforms.RandomCrop((1024, 1024), pad_if_needed=True),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "multiscale":
t = [
transforms.RandomShortestSize(
min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True
),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "ssd":
t = [
transforms.RandomPhotometricDistort(p=1),
transforms.RandomZoomOut(fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})),
transforms.RandomIoUCrop(),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "ssdlite":
t = [
transforms.RandomIoUCrop(),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
if sanitize:
t += [transforms.SanitizeBoundingBoxes()]
t = transforms.Compose(t)
num_boxes = 5
H = W = 250
image = datapoints.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8))
if image_type is PIL.Image:
image = to_pil_image(image[0])
elif image_type is torch.Tensor:
image = image.as_subclass(torch.Tensor)
assert is_simple_tensor(image)
label = torch.randint(0, 10, size=(num_boxes,))
boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4))
boxes[:, 2:] += boxes[:, :2]
boxes = boxes.clamp(min=0, max=min(H, W))
boxes = datapoints.BoundingBox(boxes, format="XYXY", spatial_size=(H, W))
masks = datapoints.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8))
sample = {
"image": image,
"label": label,
"boxes": boxes,
"masks": masks,
}
out = t(sample)
if to_tensor is transforms.ToTensor and image_type is not datapoints.Image:
assert is_simple_tensor(out["image"])
else:
assert isinstance(out["image"], datapoints.Image)
assert isinstance(out["label"], type(sample["label"]))
num_boxes_expected = {
# ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It
# doesn't remove them strictly speaking, it just marks some boxes as
# degenerate and those boxes will be later removed by
# SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize
# param is True.
# Note that the values below are probably specific to the random seed
# set above (which is fine).
(True, "ssd"): 4,
(True, "ssdlite"): 4,
}.get((sanitize, data_augmentation), num_boxes)
assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected
@pytest.mark.parametrize("min_size", (1, 10))
@pytest.mark.parametrize(
"labels_getter", ("default", "labels", lambda inputs: inputs["labels"], None, lambda inputs: None)
)
def test_sanitize_bounding_boxes(min_size, labels_getter):
H, W = 256, 128
boxes_and_validity = [
([0, 1, 10, 1], False), # Y1 == Y2
([0, 1, 0, 20], False), # X1 == X2
([0, 0, min_size - 1, 10], False), # H < min_size
([0, 0, 10, min_size - 1], False), # W < min_size
([0, 0, 10, H + 1], False), # Y2 > H
([0, 0, W + 1, 10], False), # X2 > W
([-1, 1, 10, 20], False), # any < 0
([0, 0, -1, 20], False), # any < 0
([0, 0, -10, -1], False), # any < 0
([0, 0, min_size, 10], True), # H < min_size
([0, 0, 10, min_size], True), # W < min_size
([0, 0, W, H], True), # TODO: Is that actually OK?? Should it be -1?
([1, 1, 30, 20], True),
([0, 0, 10, 10], True),
([1, 1, 30, 20], True),
]
random.shuffle(boxes_and_validity) # For test robustness: mix order of wrong and correct cases
boxes, is_valid_mask = zip(*boxes_and_validity)
valid_indices = [i for (i, is_valid) in enumerate(is_valid_mask) if is_valid]
boxes = torch.tensor(boxes)
labels = torch.arange(boxes.shape[0])
boxes = datapoints.BoundingBox(
boxes,
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(H, W),
loader = importlib.machinery.SourceFileLoader(
"transforms", str(PROJECT_ROOT / "references" / reference / "transforms.py")
)
spec = importlib.util.spec_from_loader("transforms", loader)
module = importlib.util.module_from_spec(spec)
loader.exec_module(module)
return module
masks = datapoints.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W)))
sample = {
"image": torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8),
"labels": labels,
"boxes": boxes,
"whatever": torch.rand(10),
"None": None,
"masks": masks,
}
out = transforms.SanitizeBoundingBoxes(min_size=min_size, labels_getter=labels_getter)(sample)
det_transforms = import_transforms_from_references("detection")
assert out["image"] is sample["image"]
assert out["whatever"] is sample["whatever"]
if labels_getter is None or (callable(labels_getter) and labels_getter({"labels": "blah"}) is None):
assert out["labels"] is sample["labels"]
else:
assert isinstance(out["labels"], torch.Tensor)
assert out["boxes"].shape[0] == out["labels"].shape[0] == out["masks"].shape[0]
# This works because we conveniently set labels to arange(num_boxes)
assert out["labels"].tolist() == valid_indices
def test_fixed_sized_crop_against_detection_reference():
def make_datapoints():
size = (600, 800)
num_objects = 22
pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
}
@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT"))
def test_sanitize_bounding_boxes_default_heuristic(key):
labels = torch.arange(10)
d = {key: labels}
assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels
if key.lower() != "labels":
# If "labels" is in the dict (case-insensitive),
# it takes precedence over other keys which would otherwise be a match
d = {key: "something_else", "labels": labels}
assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels
yield (pil_image, target)
def test_sanitize_bounding_boxes_errors():
tensor_image = torch.Tensor(make_image(size=size, color_space="RGB"))
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
}
good_bbox = datapoints.BoundingBox(
[[0, 0, 10, 10]],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(20, 20),
)
yield (tensor_image, target)
with pytest.raises(ValueError, match="min_size must be >= 1"):
transforms.SanitizeBoundingBoxes(min_size=0)
with pytest.raises(ValueError, match="labels_getter should either be a str"):
transforms.SanitizeBoundingBoxes(labels_getter=12)
datapoint_image = make_image(size=size, color_space="RGB")
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
"labels": make_label(extra_dims=(num_objects,), categories=80),
"masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
}
with pytest.raises(ValueError, match="Could not infer where the labels are"):
bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])}
transforms.SanitizeBoundingBoxes()(bad_labels_key)
yield (datapoint_image, target)
with pytest.raises(ValueError, match="If labels_getter is a str or 'default'"):
not_a_dict = (good_bbox, torch.arange(good_bbox.shape[0]))
transforms.SanitizeBoundingBoxes()(not_a_dict)
t = transforms.FixedSizeCrop((1024, 1024), fill=0)
t_ref = det_transforms.FixedSizeCrop((1024, 1024), fill=0)
with pytest.raises(ValueError, match="must be a tensor"):
not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()}
transforms.SanitizeBoundingBoxes()(not_a_tensor)
for dp in make_datapoints():
# We should use prototype transform first as reference transform performs inplace target update
torch.manual_seed(12)
output = t(dp)
with pytest.raises(ValueError, match="Number of boxes"):
different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)}
transforms.SanitizeBoundingBoxes()(different_sizes)
torch.manual_seed(12)
expected_output = t_ref(*dp)
with pytest.raises(ValueError, match="boxes must be of shape"):
bad_bbox = datapoints.BoundingBox( # batch with 2 elements
[
[[0, 0, 10, 10]],
[[0, 0, 10, 10]],
],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(20, 20),
)
different_sizes = {"bbox": bad_bbox, "labels": torch.arange(bad_bbox.shape[0])}
transforms.SanitizeBoundingBoxes()(different_sizes)
assert_equal(expected_output, output)
import itertools
import pathlib
import random
import re
import warnings
from collections import defaultdict
import numpy as np
import PIL.Image
import pytest
import torch
import torchvision.transforms.v2 as transforms
from common_utils import (
assert_equal,
cpu_and_gpu,
make_bounding_box,
make_bounding_boxes,
make_detection_mask,
make_image,
make_images,
make_segmentation_mask,
make_video,
make_videos,
)
from torch.utils._pytree import tree_flatten, tree_unflatten
from torchvision import datapoints
from torchvision.ops.boxes import box_iou
from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image
from torchvision.transforms.v2 import functional as F
from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw
def make_vanilla_tensor_images(*args, **kwargs):
for image in make_images(*args, **kwargs):
if image.ndim > 3:
continue
yield image.data
def make_pil_images(*args, **kwargs):
for image in make_vanilla_tensor_images(*args, **kwargs):
yield to_pil_image(image)
def make_vanilla_tensor_bounding_boxes(*args, **kwargs):
for bounding_box in make_bounding_boxes(*args, **kwargs):
yield bounding_box.data
def parametrize(transforms_with_inputs):
return pytest.mark.parametrize(
("transform", "input"),
[
pytest.param(
transform,
input,
id=f"{type(transform).__name__}-{type(input).__module__}.{type(input).__name__}-{idx}",
)
for transform, inputs in transforms_with_inputs
for idx, input in enumerate(inputs)
],
)
def auto_augment_adapter(transform, input, device):
adapted_input = {}
image_or_video_found = False
for key, value in input.items():
if isinstance(value, (datapoints.BoundingBox, datapoints.Mask)):
# AA transforms don't support bounding boxes or masks
continue
elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor, PIL.Image.Image)):
if image_or_video_found:
# AA transforms only support a single image or video
continue
image_or_video_found = True
adapted_input[key] = value
return adapted_input
def linear_transformation_adapter(transform, input, device):
flat_inputs = list(input.values())
c, h, w = query_chw(
[
item
for item, needs_transform in zip(flat_inputs, transforms.Transform()._needs_transform_list(flat_inputs))
if needs_transform
]
)
num_elements = c * h * w
transform.transformation_matrix = torch.randn((num_elements, num_elements), device=device)
transform.mean_vector = torch.randn((num_elements,), device=device)
return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)}
def normalize_adapter(transform, input, device):
adapted_input = {}
for key, value in input.items():
if isinstance(value, PIL.Image.Image):
# normalize doesn't support PIL images
continue
elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor)):
# normalize doesn't support integer images
value = F.convert_dtype(value, torch.float32)
adapted_input[key] = value
return adapted_input
class TestSmoke:
@pytest.mark.parametrize(
("transform", "adapter"),
[
(transforms.RandomErasing(p=1.0), None),
(transforms.AugMix(), auto_augment_adapter),
(transforms.AutoAugment(), auto_augment_adapter),
(transforms.RandAugment(), auto_augment_adapter),
(transforms.TrivialAugmentWide(), auto_augment_adapter),
(transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None),
(transforms.Grayscale(), None),
(transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1.0), None),
(transforms.RandomAutocontrast(p=1.0), None),
(transforms.RandomEqualize(p=1.0), None),
(transforms.RandomGrayscale(p=1.0), None),
(transforms.RandomInvert(p=1.0), None),
(transforms.RandomPhotometricDistort(p=1.0), None),
(transforms.RandomPosterize(bits=4, p=1.0), None),
(transforms.RandomSolarize(threshold=0.5, p=1.0), None),
(transforms.CenterCrop([16, 16]), None),
(transforms.ElasticTransform(sigma=1.0), None),
(transforms.Pad(4), None),
(transforms.RandomAffine(degrees=30.0), None),
(transforms.RandomCrop([16, 16], pad_if_needed=True), None),
(transforms.RandomHorizontalFlip(p=1.0), None),
(transforms.RandomPerspective(p=1.0), None),
(transforms.RandomResize(min_size=10, max_size=20), None),
(transforms.RandomResizedCrop([16, 16]), None),
(transforms.RandomRotation(degrees=30), None),
(transforms.RandomShortestSize(min_size=10), None),
(transforms.RandomVerticalFlip(p=1.0), None),
(transforms.RandomZoomOut(p=1.0), None),
(transforms.Resize([16, 16], antialias=True), None),
(transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2)), None),
(transforms.ClampBoundingBox(), None),
(transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None),
(transforms.ConvertDtype(), None),
(transforms.GaussianBlur(kernel_size=3), None),
(
transforms.LinearTransformation(
# These are just dummy values that will be filled by the adapter. We can't define them upfront,
# because for we neither know the spatial size nor the device at this point
transformation_matrix=torch.empty((1, 1)),
mean_vector=torch.empty((1,)),
),
linear_transformation_adapter,
),
(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), normalize_adapter),
(transforms.ToDtype(torch.float64), None),
(transforms.UniformTemporalSubsample(num_samples=2), None),
],
ids=lambda transform: type(transform).__name__,
)
@pytest.mark.parametrize("container_type", [dict, list, tuple])
@pytest.mark.parametrize(
"image_or_video",
[
make_image(),
make_video(),
next(make_pil_images(color_spaces=["RGB"])),
next(make_vanilla_tensor_images()),
],
)
@pytest.mark.parametrize("device", cpu_and_gpu())
def test_common(self, transform, adapter, container_type, image_or_video, device):
spatial_size = F.get_spatial_size(image_or_video)
input = dict(
image_or_video=image_or_video,
image_datapoint=make_image(size=spatial_size),
video_datapoint=make_video(size=spatial_size),
image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])),
bounding_box_xyxy=make_bounding_box(
format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,)
),
bounding_box_xywh=make_bounding_box(
format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,)
),
bounding_box_cxcywh=make_bounding_box(
format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,)
),
bounding_box_degenerate_xyxy=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[2, 0, 1, 1], # x1 > x2, y1 < y2
[0, 2, 1, 1], # x1 < x2, y1 > y2
[2, 2, 1, 1], # x1 > x2, y1 > y2
],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=spatial_size,
),
bounding_box_degenerate_xywh=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[0, 0, 1, -1], # negative height
[0, 0, -1, 1], # negative width
[0, 0, -1, -1], # negative height and width
],
format=datapoints.BoundingBoxFormat.XYWH,
spatial_size=spatial_size,
),
bounding_box_degenerate_cxcywh=datapoints.BoundingBox(
[
[0, 0, 0, 0], # no height or width
[0, 0, 0, 1], # no height
[0, 0, 1, 0], # no width
[0, 0, 1, -1], # negative height
[0, 0, -1, 1], # negative width
[0, 0, -1, -1], # negative height and width
],
format=datapoints.BoundingBoxFormat.CXCYWH,
spatial_size=spatial_size,
),
detection_mask=make_detection_mask(size=spatial_size),
segmentation_mask=make_segmentation_mask(size=spatial_size),
int=0,
float=0.0,
bool=True,
none=None,
str="str",
path=pathlib.Path.cwd(),
object=object(),
tensor=torch.empty(5),
array=np.empty(5),
)
if adapter is not None:
input = adapter(transform, input, device)
if container_type in {tuple, list}:
input = container_type(input.values())
input_flat, input_spec = tree_flatten(input)
input_flat = [item.to(device) if isinstance(item, torch.Tensor) else item for item in input_flat]
input = tree_unflatten(input_flat, input_spec)
torch.manual_seed(0)
output = transform(input)
output_flat, output_spec = tree_flatten(output)
assert output_spec == input_spec
for output_item, input_item, should_be_transformed in zip(
output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat)
):
if should_be_transformed:
assert type(output_item) is type(input_item)
else:
assert output_item is input_item
if isinstance(input_item, datapoints.BoundingBox) and not isinstance(
transform, transforms.ConvertBoundingBoxFormat
):
assert output_item.format == input_item.format
# Enforce that the transform does not turn a degenerate box marked by RandomIoUCrop (or any other future
# transform that does this), back into a valid one.
# TODO: we should test that against all degenerate boxes above
for format in list(datapoints.BoundingBoxFormat):
sample = dict(
boxes=datapoints.BoundingBox([[0, 0, 0, 0]], format=format, spatial_size=(224, 244)),
labels=torch.tensor([3]),
)
assert transforms.SanitizeBoundingBoxes()(sample)["boxes"].shape == (0, 4)
@parametrize(
[
(
transform,
itertools.chain.from_iterable(
fn(
color_spaces=[
"GRAY",
"RGB",
],
dtypes=[torch.uint8],
extra_dims=[(), (4,)],
**(dict(num_frames=["random"]) if fn is make_videos else dict()),
)
for fn in [
make_images,
make_vanilla_tensor_images,
make_pil_images,
make_videos,
]
),
)
for transform in (
transforms.RandAugment(),
transforms.TrivialAugmentWide(),
transforms.AutoAugment(),
transforms.AugMix(),
)
]
)
def test_auto_augment(self, transform, input):
transform(input)
@parametrize(
[
(
transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]),
itertools.chain.from_iterable(
fn(color_spaces=["RGB"], dtypes=[torch.float32])
for fn in [
make_images,
make_vanilla_tensor_images,
make_videos,
]
),
),
]
)
def test_normalize(self, transform, input):
transform(input)
@parametrize(
[
(
transforms.RandomResizedCrop([16, 16], antialias=True),
itertools.chain(
make_images(extra_dims=[(4,)]),
make_vanilla_tensor_images(),
make_pil_images(),
make_videos(extra_dims=[()]),
),
)
]
)
def test_random_resized_crop(self, transform, input):
transform(input)
@pytest.mark.parametrize(
"flat_inputs",
itertools.permutations(
[
next(make_vanilla_tensor_images()),
next(make_vanilla_tensor_images()),
next(make_pil_images()),
make_image(),
next(make_videos()),
],
3,
),
)
def test_simple_tensor_heuristic(flat_inputs):
def split_on_simple_tensor(to_split):
# This takes a sequence that is structurally aligned with `flat_inputs` and splits its items into three parts:
# 1. The first simple tensor. If none is present, this will be `None`
# 2. A list of the remaining simple tensors
# 3. A list of all other items
simple_tensors = []
others = []
# Splitting always happens on the original `flat_inputs` to avoid any erroneous type changes by the transform to
# affect the splitting.
for item, inpt in zip(to_split, flat_inputs):
(simple_tensors if is_simple_tensor(inpt) else others).append(item)
return simple_tensors[0] if simple_tensors else None, simple_tensors[1:], others
class CopyCloneTransform(transforms.Transform):
def _transform(self, inpt, params):
return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy()
@staticmethod
def was_applied(output, inpt):
identity = output is inpt
if identity:
return False
# Make sure nothing fishy is going on
assert_equal(output, inpt)
return True
first_simple_tensor_input, other_simple_tensor_inputs, other_inputs = split_on_simple_tensor(flat_inputs)
transform = CopyCloneTransform()
transformed_sample = transform(flat_inputs)
first_simple_tensor_output, other_simple_tensor_outputs, other_outputs = split_on_simple_tensor(transformed_sample)
if first_simple_tensor_input is not None:
if other_inputs:
assert not transform.was_applied(first_simple_tensor_output, first_simple_tensor_input)
else:
assert transform.was_applied(first_simple_tensor_output, first_simple_tensor_input)
for output, inpt in zip(other_simple_tensor_outputs, other_simple_tensor_inputs):
assert not transform.was_applied(output, inpt)
for input, output in zip(other_inputs, other_outputs):
assert transform.was_applied(output, input)
@pytest.mark.parametrize("p", [0.0, 1.0])
class TestRandomHorizontalFlip:
def input_expected_image_tensor(self, p, dtype=torch.float32):
input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype)
expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype)
return input, expected if p == 1 else input
def test_simple_tensor(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(input)
assert_equal(expected, actual)
def test_pil_image(self, p):
input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(to_pil_image(input))
assert_equal(expected, pil_to_tensor(actual))
def test_datapoints_image(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(datapoints.Image(input))
assert_equal(datapoints.Image(expected), actual)
def test_datapoints_mask(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(datapoints.Mask(input))
assert_equal(datapoints.Mask(expected), actual)
def test_datapoints_bounding_box(self, p):
input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
transform = transforms.RandomHorizontalFlip(p=p)
actual = transform(input)
expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input
expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
assert_equal(expected, actual)
assert actual.format == expected.format
assert actual.spatial_size == expected.spatial_size
@pytest.mark.parametrize("p", [0.0, 1.0])
class TestRandomVerticalFlip:
def input_expected_image_tensor(self, p, dtype=torch.float32):
input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype)
expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype)
return input, expected if p == 1 else input
def test_simple_tensor(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(input)
assert_equal(expected, actual)
def test_pil_image(self, p):
input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(to_pil_image(input))
assert_equal(expected, pil_to_tensor(actual))
def test_datapoints_image(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(datapoints.Image(input))
assert_equal(datapoints.Image(expected), actual)
def test_datapoints_mask(self, p):
input, expected = self.input_expected_image_tensor(p)
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(datapoints.Mask(input))
assert_equal(datapoints.Mask(expected), actual)
def test_datapoints_bounding_box(self, p):
input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
transform = transforms.RandomVerticalFlip(p=p)
actual = transform(input)
expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input
expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
assert_equal(expected, actual)
assert actual.format == expected.format
assert actual.spatial_size == expected.spatial_size
class TestPad:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.Pad("abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.Pad([-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.Pad(12, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.Pad(12, padding_mode="abc")
@pytest.mark.parametrize("padding", [1, (1, 2), [1, 2, 3, 4]])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("padding_mode", ["constant", "edge"])
def test__transform(self, padding, fill, padding_mode, mocker):
transform = transforms.Pad(padding, fill=fill, padding_mode=padding_mode)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
inpt = mocker.MagicMock(spec=datapoints.Image)
_ = transform(inpt)
fill = transforms._utils._convert_fill_arg(fill)
if isinstance(padding, tuple):
padding = list(padding)
fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode)
@pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}])
def test__transform_image_mask(self, fill, mocker):
transform = transforms.Pad(1, fill=fill, padding_mode="constant")
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
image = datapoints.Image(torch.rand(3, 32, 32))
mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32)))
inpt = [image, mask]
_ = transform(inpt)
if isinstance(fill, int):
fill = transforms._utils._convert_fill_arg(fill)
calls = [
mocker.call(image, padding=1, fill=fill, padding_mode="constant"),
mocker.call(mask, padding=1, fill=fill, padding_mode="constant"),
]
else:
fill_img = transforms._utils._convert_fill_arg(fill[type(image)])
fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)])
calls = [
mocker.call(image, padding=1, fill=fill_img, padding_mode="constant"),
mocker.call(mask, padding=1, fill=fill_mask, padding_mode="constant"),
]
fn.assert_has_calls(calls)
class TestRandomZoomOut:
def test_assertions(self):
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomZoomOut(fill="abc")
with pytest.raises(TypeError, match="should be a sequence of length"):
transforms.RandomZoomOut(0, side_range=0)
with pytest.raises(ValueError, match="Invalid canvas side range"):
transforms.RandomZoomOut(0, side_range=[4.0, 1.0])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__get_params(self, fill, side_range, mocker):
transform = transforms.RandomZoomOut(fill=fill, side_range=side_range)
image = mocker.MagicMock(spec=datapoints.Image)
h, w = image.spatial_size = (24, 32)
params = transform._get_params([image])
assert len(params["padding"]) == 4
assert 0 <= params["padding"][0] <= (side_range[1] - 1) * w
assert 0 <= params["padding"][1] <= (side_range[1] - 1) * h
assert 0 <= params["padding"][2] <= (side_range[1] - 1) * w
assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__transform(self, fill, side_range, mocker):
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
transform = transforms.RandomZoomOut(fill=fill, side_range=side_range, p=1)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill)
@pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}])
def test__transform_image_mask(self, fill, mocker):
transform = transforms.RandomZoomOut(fill=fill, p=1.0)
fn = mocker.patch("torchvision.transforms.v2.functional.pad")
image = datapoints.Image(torch.rand(3, 32, 32))
mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32)))
inpt = [image, mask]
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params(inpt)
if isinstance(fill, int):
fill = transforms._utils._convert_fill_arg(fill)
calls = [
mocker.call(image, **params, fill=fill),
mocker.call(mask, **params, fill=fill),
]
else:
fill_img = transforms._utils._convert_fill_arg(fill[type(image)])
fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)])
calls = [
mocker.call(image, **params, fill=fill_img),
mocker.call(mask, **params, fill=fill_mask),
]
fn.assert_has_calls(calls)
class TestRandomRotation:
def test_assertions(self):
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomRotation(-0.7)
for d in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
transforms.RandomRotation(d)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomRotation(12, fill="abc")
with pytest.raises(TypeError, match="center should be a sequence of length"):
transforms.RandomRotation(12, center=12)
with pytest.raises(ValueError, match="center should be a sequence of length"):
transforms.RandomRotation(12, center=[1, 2, 3])
def test__get_params(self):
angle_bound = 34
transform = transforms.RandomRotation(angle_bound)
params = transform._get_params(None)
assert -angle_bound <= params["angle"] <= angle_bound
angle_bounds = [12, 34]
transform = transforms.RandomRotation(angle_bounds)
params = transform._get_params(None)
assert angle_bounds[0] <= params["angle"] <= angle_bounds[1]
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("expand", [False, True])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("center", [None, [2.0, 3.0]])
def test__transform(self, degrees, expand, fill, center, mocker):
interpolation = InterpolationMode.BILINEAR
transform = transforms.RandomRotation(
degrees, interpolation=interpolation, expand=expand, fill=fill, center=center
)
if isinstance(degrees, (tuple, list)):
assert transform.degrees == [float(degrees[0]), float(degrees[1])]
else:
assert transform.degrees == [float(-degrees), float(degrees)]
fn = mocker.patch("torchvision.transforms.v2.functional.rotate")
inpt = mocker.MagicMock(spec=datapoints.Image)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params(inpt)
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center)
@pytest.mark.parametrize("angle", [34, -87])
@pytest.mark.parametrize("expand", [False, True])
def test_boundingbox_spatial_size(self, angle, expand):
# Specific test for BoundingBox.rotate
bbox = datapoints.BoundingBox(
torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32)
)
img = datapoints.Image(torch.rand(1, 3, 32, 32))
out_img = img.rotate(angle, expand=expand)
out_bbox = bbox.rotate(angle, expand=expand)
assert out_img.spatial_size == out_bbox.spatial_size
class TestRandomAffine:
def test_assertions(self):
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomAffine(-0.7)
for d in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
transforms.RandomAffine(d)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomAffine(12, fill="abc")
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomAffine(12, fill="abc")
for kwargs in [
{"center": 12},
{"translate": 12},
{"scale": 12},
]:
with pytest.raises(TypeError, match="should be a sequence of length"):
transforms.RandomAffine(12, **kwargs)
for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]:
with pytest.raises(ValueError, match="should be a sequence of length"):
transforms.RandomAffine(12, **kwargs)
with pytest.raises(ValueError, match="translation values should be between 0 and 1"):
transforms.RandomAffine(12, translate=[-1.0, 2.0])
with pytest.raises(ValueError, match="scale values should be positive"):
transforms.RandomAffine(12, scale=[-1.0, 2.0])
with pytest.raises(ValueError, match="is a single number, it must be positive"):
transforms.RandomAffine(12, shear=-10)
for s in [[-0.7], [-0.7, 0, 0.7]]:
with pytest.raises(ValueError, match="shear should be a sequence of length 2"):
transforms.RandomAffine(12, shear=s)
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
@pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
@pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
def test__get_params(self, degrees, translate, scale, shear, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
h, w = image.spatial_size
transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear)
params = transform._get_params([image])
if not isinstance(degrees, (list, tuple)):
assert -degrees <= params["angle"] <= degrees
else:
assert degrees[0] <= params["angle"] <= degrees[1]
if translate is not None:
w_max = int(round(translate[0] * w))
h_max = int(round(translate[1] * h))
assert -w_max <= params["translate"][0] <= w_max
assert -h_max <= params["translate"][1] <= h_max
else:
assert params["translate"] == (0, 0)
if scale is not None:
assert scale[0] <= params["scale"] <= scale[1]
else:
assert params["scale"] == 1.0
if shear is not None:
if isinstance(shear, float):
assert -shear <= params["shear"][0] <= shear
assert params["shear"][1] == 0.0
elif len(shear) == 2:
assert shear[0] <= params["shear"][0] <= shear[1]
assert params["shear"][1] == 0.0
else:
assert shear[0] <= params["shear"][0] <= shear[1]
assert shear[2] <= params["shear"][1] <= shear[3]
else:
assert params["shear"] == (0, 0)
@pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
@pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
@pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
@pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("center", [None, [2.0, 3.0]])
def test__transform(self, degrees, translate, scale, shear, fill, center, mocker):
interpolation = InterpolationMode.BILINEAR
transform = transforms.RandomAffine(
degrees,
translate=translate,
scale=scale,
shear=shear,
interpolation=interpolation,
fill=fill,
center=center,
)
if isinstance(degrees, (tuple, list)):
assert transform.degrees == [float(degrees[0]), float(degrees[1])]
else:
assert transform.degrees == [float(-degrees), float(degrees)]
fn = mocker.patch("torchvision.transforms.v2.functional.affine")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center)
class TestRandomCrop:
def test_assertions(self):
with pytest.raises(ValueError, match="Please provide only two dimensions"):
transforms.RandomCrop([10, 12, 14])
with pytest.raises(TypeError, match="Got inappropriate padding arg"):
transforms.RandomCrop([10, 12], padding="abc")
with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"):
transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomCrop([10, 12], padding=1, fill="abc")
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.RandomCrop([10, 12], padding=1, padding_mode="abc")
@pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]])
@pytest.mark.parametrize("size, pad_if_needed", [((10, 10), False), ((50, 25), True)])
def test__get_params(self, padding, pad_if_needed, size, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
h, w = image.spatial_size
transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed)
params = transform._get_params([image])
if padding is not None:
if isinstance(padding, int):
pad_top = pad_bottom = pad_left = pad_right = padding
elif isinstance(padding, list) and len(padding) == 2:
pad_left = pad_right = padding[0]
pad_top = pad_bottom = padding[1]
elif isinstance(padding, list) and len(padding) == 4:
pad_left, pad_top, pad_right, pad_bottom = padding
h += pad_top + pad_bottom
w += pad_left + pad_right
else:
pad_left = pad_right = pad_top = pad_bottom = 0
if pad_if_needed:
if w < size[1]:
diff = size[1] - w
pad_left += diff
pad_right += diff
w += 2 * diff
if h < size[0]:
diff = size[0] - h
pad_top += diff
pad_bottom += diff
h += 2 * diff
padding = [pad_left, pad_top, pad_right, pad_bottom]
assert 0 <= params["top"] <= h - size[0] + 1
assert 0 <= params["left"] <= w - size[1] + 1
assert params["height"] == size[0]
assert params["width"] == size[1]
assert params["needs_pad"] is any(padding)
assert params["padding"] == padding
@pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]])
@pytest.mark.parametrize("pad_if_needed", [False, True])
@pytest.mark.parametrize("fill", [False, True])
@pytest.mark.parametrize("padding_mode", ["constant", "edge"])
def test__transform(self, padding, pad_if_needed, fill, padding_mode, mocker):
output_size = [10, 12]
transform = transforms.RandomCrop(
output_size, padding=padding, pad_if_needed=pad_if_needed, fill=fill, padding_mode=padding_mode
)
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (32, 32)
expected = mocker.MagicMock(spec=datapoints.Image)
expected.num_channels = 3
if isinstance(padding, int):
expected.spatial_size = (inpt.spatial_size[0] + padding, inpt.spatial_size[1] + padding)
elif isinstance(padding, list):
expected.spatial_size = (
inpt.spatial_size[0] + sum(padding[0::2]),
inpt.spatial_size[1] + sum(padding[1::2]),
)
else:
expected.spatial_size = inpt.spatial_size
_ = mocker.patch("torchvision.transforms.v2.functional.pad", return_value=expected)
fn_crop = mocker.patch("torchvision.transforms.v2.functional.crop")
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
if padding is None and not pad_if_needed:
fn_crop.assert_called_once_with(
inpt, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1]
)
elif not pad_if_needed:
fn_crop.assert_called_once_with(
expected, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1]
)
elif padding is None:
# vfdev-5: I do not know how to mock and test this case
pass
else:
# vfdev-5: I do not know how to mock and test this case
pass
class TestGaussianBlur:
def test_assertions(self):
with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"):
transforms.GaussianBlur([10, 12, 14])
with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"):
transforms.GaussianBlur(4)
with pytest.raises(
TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats."
):
transforms.GaussianBlur(3, sigma=[1, 2, 3])
with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"):
transforms.GaussianBlur(3, sigma=-1.0)
with pytest.raises(ValueError, match="sigma values should be positive and of the form"):
transforms.GaussianBlur(3, sigma=[2.0, 1.0])
@pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]])
def test__get_params(self, sigma):
transform = transforms.GaussianBlur(3, sigma=sigma)
params = transform._get_params([])
if isinstance(sigma, float):
assert params["sigma"][0] == params["sigma"][1] == 10
else:
assert sigma[0] <= params["sigma"][0] <= sigma[1]
assert sigma[0] <= params["sigma"][1] <= sigma[1]
@pytest.mark.parametrize("kernel_size", [3, [3, 5], (5, 3)])
@pytest.mark.parametrize("sigma", [2.0, [2.0, 3.0]])
def test__transform(self, kernel_size, sigma, mocker):
transform = transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma)
if isinstance(kernel_size, (tuple, list)):
assert transform.kernel_size == kernel_size
else:
kernel_size = (kernel_size, kernel_size)
assert transform.kernel_size == kernel_size
if isinstance(sigma, (tuple, list)):
assert transform.sigma == sigma
else:
assert transform.sigma == [sigma, sigma]
fn = mocker.patch("torchvision.transforms.v2.functional.gaussian_blur")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
params = transform._get_params([inpt])
fn.assert_called_once_with(inpt, kernel_size, **params)
class TestRandomColorOp:
@pytest.mark.parametrize("p", [0.0, 1.0])
@pytest.mark.parametrize(
"transform_cls, func_op_name, kwargs",
[
(transforms.RandomEqualize, "equalize", {}),
(transforms.RandomInvert, "invert", {}),
(transforms.RandomAutocontrast, "autocontrast", {}),
(transforms.RandomPosterize, "posterize", {"bits": 4}),
(transforms.RandomSolarize, "solarize", {"threshold": 0.5}),
(transforms.RandomAdjustSharpness, "adjust_sharpness", {"sharpness_factor": 0.5}),
],
)
def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker):
transform = transform_cls(p=p, **kwargs)
fn = mocker.patch(f"torchvision.transforms.v2.functional.{func_op_name}")
inpt = mocker.MagicMock(spec=datapoints.Image)
_ = transform(inpt)
if p > 0.0:
fn.assert_called_once_with(inpt, **kwargs)
else:
assert fn.call_count == 0
class TestRandomPerspective:
def test_assertions(self):
with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"):
transforms.RandomPerspective(distortion_scale=-1.0)
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomPerspective(0.5, fill="abc")
def test__get_params(self, mocker):
dscale = 0.5
transform = transforms.RandomPerspective(dscale)
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
params = transform._get_params([image])
h, w = image.spatial_size
assert "coefficients" in params
assert len(params["coefficients"]) == 8
@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker):
interpolation = InterpolationMode.BILINEAR
fill = 12
transform = transforms.RandomPerspective(distortion_scale, fill=fill, interpolation=interpolation)
fn = mocker.patch("torchvision.transforms.v2.functional.perspective")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
# Otherwise, we can mock transform._get_params
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation)
class TestElasticTransform:
def test_assertions(self):
with pytest.raises(TypeError, match="alpha should be float or a sequence of floats"):
transforms.ElasticTransform({})
with pytest.raises(ValueError, match="alpha is a sequence its length should be one of 2"):
transforms.ElasticTransform([1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="alpha should be a sequence of floats"):
transforms.ElasticTransform([1, 2])
with pytest.raises(TypeError, match="sigma should be float or a sequence of floats"):
transforms.ElasticTransform(1.0, {})
with pytest.raises(ValueError, match="sigma is a sequence its length should be one of 2"):
transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0])
with pytest.raises(ValueError, match="sigma should be a sequence of floats"):
transforms.ElasticTransform(1.0, [1, 2])
with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.ElasticTransform(1.0, 2.0, fill="abc")
def test__get_params(self, mocker):
alpha = 2.0
sigma = 3.0
transform = transforms.ElasticTransform(alpha, sigma)
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
params = transform._get_params([image])
h, w = image.spatial_size
displacement = params["displacement"]
assert displacement.shape == (1, h, w, 2)
assert (-alpha / w <= displacement[0, ..., 0]).all() and (displacement[0, ..., 0] <= alpha / w).all()
assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all()
@pytest.mark.parametrize("alpha", [5.0, [5.0, 10.0]])
@pytest.mark.parametrize("sigma", [2.0, [2.0, 5.0]])
def test__transform(self, alpha, sigma, mocker):
interpolation = InterpolationMode.BILINEAR
fill = 12
transform = transforms.ElasticTransform(alpha, sigma=sigma, fill=fill, interpolation=interpolation)
if isinstance(alpha, float):
assert transform.alpha == [alpha, alpha]
else:
assert transform.alpha == alpha
if isinstance(sigma, float):
assert transform.sigma == [sigma, sigma]
else:
assert transform.sigma == sigma
fn = mocker.patch("torchvision.transforms.v2.functional.elastic")
inpt = mocker.MagicMock(spec=datapoints.Image)
inpt.num_channels = 3
inpt.spatial_size = (24, 32)
# Let's mock transform._get_params to control the output:
transform._get_params = mocker.MagicMock()
_ = transform(inpt)
params = transform._get_params([inpt])
fill = transforms._utils._convert_fill_arg(fill)
fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
class TestRandomErasing:
def test_assertions(self, mocker):
with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"):
transforms.RandomErasing(value={})
with pytest.raises(ValueError, match="If value is str, it should be 'random'"):
transforms.RandomErasing(value="abc")
with pytest.raises(TypeError, match="Scale should be a sequence"):
transforms.RandomErasing(scale=123)
with pytest.raises(TypeError, match="Ratio should be a sequence"):
transforms.RandomErasing(ratio=123)
with pytest.raises(ValueError, match="Scale should be between 0 and 1"):
transforms.RandomErasing(scale=[-1, 2])
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
transform = transforms.RandomErasing(value=[1, 2, 3, 4])
with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"):
transform._get_params([image])
@pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"])
def test__get_params(self, value, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
transform = transforms.RandomErasing(value=value)
params = transform._get_params([image])
v = params["v"]
h, w = params["h"], params["w"]
i, j = params["i"], params["j"]
assert isinstance(v, torch.Tensor)
if value == "random":
assert v.shape == (image.num_channels, h, w)
elif isinstance(value, (int, float)):
assert v.shape == (1, 1, 1)
elif isinstance(value, (list, tuple)):
assert v.shape == (image.num_channels, 1, 1)
assert 0 <= i <= image.spatial_size[0] - h
assert 0 <= j <= image.spatial_size[1] - w
@pytest.mark.parametrize("p", [0, 1])
def test__transform(self, mocker, p):
transform = transforms.RandomErasing(p=p)
transform._transformed_types = (mocker.MagicMock,)
i_sentinel = mocker.MagicMock()
j_sentinel = mocker.MagicMock()
h_sentinel = mocker.MagicMock()
w_sentinel = mocker.MagicMock()
v_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._augment.RandomErasing._get_params",
return_value=dict(i=i_sentinel, j=j_sentinel, h=h_sentinel, w=w_sentinel, v=v_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._augment.F.erase")
output = transform(inpt_sentinel)
if p:
mock.assert_called_once_with(
inpt_sentinel,
i=i_sentinel,
j=j_sentinel,
h=h_sentinel,
w=w_sentinel,
v=v_sentinel,
inplace=transform.inplace,
)
else:
mock.assert_not_called()
assert output is inpt_sentinel
class TestTransform:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test_check_transformed_types(self, inpt_type, mocker):
# This test ensures that we correctly handle which types to transform and which to bypass
t = transforms.Transform()
inpt = mocker.MagicMock(spec=inpt_type)
if inpt_type in (np.ndarray, str, int):
output = t(inpt)
assert output is inpt
else:
with pytest.raises(NotImplementedError):
t(inpt)
class TestToImageTensor:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch(
"torchvision.transforms.v2.functional.to_image_tensor",
return_value=torch.rand(1, 3, 8, 8),
)
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToImageTensor()
transform(inpt)
if inpt_type in (datapoints.BoundingBox, datapoints.Image, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt)
class TestToImagePIL:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToImagePIL()
transform(inpt)
if inpt_type in (datapoints.BoundingBox, PIL.Image.Image, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt, mode=transform.mode)
class TestToPILImage:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil")
inpt = mocker.MagicMock(spec=inpt_type)
transform = transforms.ToPILImage()
transform(inpt)
if inpt_type in (PIL.Image.Image, datapoints.BoundingBox, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt, mode=transform.mode)
class TestToTensor:
@pytest.mark.parametrize(
"inpt_type",
[torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int],
)
def test__transform(self, inpt_type, mocker):
fn = mocker.patch("torchvision.transforms.functional.to_tensor")
inpt = mocker.MagicMock(spec=inpt_type)
with pytest.warns(UserWarning, match="deprecated and will be removed"):
transform = transforms.ToTensor()
transform(inpt)
if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BoundingBox, str, int):
assert fn.call_count == 0
else:
fn.assert_called_once_with(inpt)
class TestContainers:
@pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder])
def test_assertions(self, transform_cls):
with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"):
transform_cls(transforms.RandomCrop(28))
@pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder])
@pytest.mark.parametrize(
"trfms",
[
[transforms.Pad(2), transforms.RandomCrop(28)],
[lambda x: 2.0 * x, transforms.Pad(2), transforms.RandomCrop(28)],
[transforms.Pad(2), lambda x: 2.0 * x, transforms.RandomCrop(28)],
],
)
def test_ctor(self, transform_cls, trfms):
c = transform_cls(trfms)
inpt = torch.rand(1, 3, 32, 32)
output = c(inpt)
assert isinstance(output, torch.Tensor)
assert output.ndim == 4
class TestRandomChoice:
def test_assertions(self):
with pytest.warns(UserWarning, match="Argument p is deprecated and will be removed"):
transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], p=[1, 2])
with pytest.raises(ValueError, match="The number of probabilities doesn't match the number of transforms"):
transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], probabilities=[1])
class TestRandomIoUCrop:
@pytest.mark.parametrize("device", cpu_and_gpu())
@pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
def test__get_params(self, device, options, mocker):
image = mocker.MagicMock(spec=datapoints.Image)
image.num_channels = 3
image.spatial_size = (24, 32)
bboxes = datapoints.BoundingBox(
torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]),
format="XYXY",
spatial_size=image.spatial_size,
device=device,
)
sample = [image, bboxes]
transform = transforms.RandomIoUCrop(sampler_options=options)
n_samples = 5
for _ in range(n_samples):
params = transform._get_params(sample)
if options == [2.0]:
assert len(params) == 0
return
assert len(params["is_within_crop_area"]) > 0
assert params["is_within_crop_area"].dtype == torch.bool
orig_h = image.spatial_size[0]
orig_w = image.spatial_size[1]
assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h)
assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w)
left, top = params["left"], params["top"]
new_h, new_w = params["height"], params["width"]
ious = box_iou(
bboxes,
torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device),
)
assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}"
def test__transform_empty_params(self, mocker):
transform = transforms.RandomIoUCrop(sampler_options=[2.0])
image = datapoints.Image(torch.rand(1, 3, 4, 4))
bboxes = datapoints.BoundingBox(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4))
label = torch.tensor([1])
sample = [image, bboxes, label]
# Let's mock transform._get_params to control the output:
transform._get_params = mocker.MagicMock(return_value={})
output = transform(sample)
torch.testing.assert_close(output, sample)
def test_forward_assertion(self):
transform = transforms.RandomIoUCrop()
with pytest.raises(
TypeError,
match="requires input sample to contain tensor or PIL images and bounding boxes",
):
transform(torch.tensor(0))
def test__transform(self, mocker):
transform = transforms.RandomIoUCrop()
image = datapoints.Image(torch.rand(3, 32, 24))
bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,))
masks = make_detection_mask((32, 24), num_objects=6)
sample = [image, bboxes, masks]
fn = mocker.patch("torchvision.transforms.v2.functional.crop", side_effect=lambda x, **params: x)
is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool)
params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area)
transform._get_params = mocker.MagicMock(return_value=params)
output = transform(sample)
assert fn.call_count == 3
expected_calls = [
mocker.call(image, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
mocker.call(bboxes, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
mocker.call(masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
]
fn.assert_has_calls(expected_calls)
# check number of bboxes vs number of labels:
output_bboxes = output[1]
assert isinstance(output_bboxes, datapoints.BoundingBox)
assert (output_bboxes[~is_within_crop_area] == 0).all()
output_masks = output[2]
assert isinstance(output_masks, datapoints.Mask)
class TestScaleJitter:
def test__get_params(self, mocker):
spatial_size = (24, 32)
target_size = (16, 12)
scale_range = (0.5, 1.5)
transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range)
sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size)
n_samples = 5
for _ in range(n_samples):
params = transform._get_params([sample])
assert "size" in params
size = params["size"]
assert isinstance(size, tuple) and len(size) == 2
height, width = size
r_min = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[0]
r_max = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[1]
assert int(spatial_size[0] * r_min) <= height <= int(spatial_size[0] * r_max)
assert int(spatial_size[1] * r_min) <= width <= int(spatial_size[1] * r_max)
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.ScaleJitter(
target_size=(16, 12), interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.ScaleJitter._get_params", return_value=dict(size=size_sentinel)
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock.assert_called_once_with(
inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
class TestRandomShortestSize:
@pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)])
def test__get_params(self, min_size, max_size, mocker):
spatial_size = (3, 10)
transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size)
sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size)
params = transform._get_params([sample])
assert "size" in params
size = params["size"]
assert isinstance(size, tuple) and len(size) == 2
longer = max(size)
shorter = min(size)
if max_size is not None:
assert longer <= max_size
assert shorter <= max_size
else:
assert shorter in min_size
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.RandomShortestSize(
min_size=[3, 5, 7], max_size=12, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.RandomShortestSize._get_params",
return_value=dict(size=size_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock.assert_called_once_with(
inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
class TestLinearTransformation:
def test_assertions(self):
with pytest.raises(ValueError, match="transformation_matrix should be square"):
transforms.LinearTransformation(torch.rand(2, 3), torch.rand(5))
with pytest.raises(ValueError, match="mean_vector should have the same length"):
transforms.LinearTransformation(torch.rand(3, 3), torch.rand(5))
@pytest.mark.parametrize(
"inpt",
[
122 * torch.ones(1, 3, 8, 8),
122.0 * torch.ones(1, 3, 8, 8),
datapoints.Image(122 * torch.ones(1, 3, 8, 8)),
PIL.Image.new("RGB", (8, 8), (122, 122, 122)),
],
)
def test__transform(self, inpt):
v = 121 * torch.ones(3 * 8 * 8)
m = torch.ones(3 * 8 * 8, 3 * 8 * 8)
transform = transforms.LinearTransformation(m, v)
if isinstance(inpt, PIL.Image.Image):
with pytest.raises(TypeError, match="LinearTransformation does not work on PIL Images"):
transform(inpt)
else:
output = transform(inpt)
assert isinstance(output, torch.Tensor)
assert output.unique() == 3 * 8 * 8
assert output.dtype == inpt.dtype
class TestRandomResize:
def test__get_params(self):
min_size = 3
max_size = 6
transform = transforms.RandomResize(min_size=min_size, max_size=max_size)
for _ in range(10):
params = transform._get_params([])
assert isinstance(params["size"], list) and len(params["size"]) == 1
size = params["size"][0]
assert min_size <= size < max_size
def test__transform(self, mocker):
interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode)
antialias_sentinel = mocker.MagicMock()
transform = transforms.RandomResize(
min_size=-1, max_size=-1, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
transform._transformed_types = (mocker.MagicMock,)
size_sentinel = mocker.MagicMock()
mocker.patch(
"torchvision.transforms.v2._geometry.RandomResize._get_params",
return_value=dict(size=size_sentinel),
)
inpt_sentinel = mocker.MagicMock()
mock_resize = mocker.patch("torchvision.transforms.v2._geometry.F.resize")
transform(inpt_sentinel)
mock_resize.assert_called_with(
inpt_sentinel, size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel
)
class TestToDtype:
@pytest.mark.parametrize(
("dtype", "expected_dtypes"),
[
(
torch.float64,
{
datapoints.Video: torch.float64,
datapoints.Image: torch.float64,
datapoints.BoundingBox: torch.float64,
},
),
(
{datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64},
{datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64},
),
],
)
def test_call(self, dtype, expected_dtypes):
sample = dict(
video=make_video(dtype=torch.int64),
image=make_image(dtype=torch.uint8),
bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32),
str="str",
int=0,
)
transform = transforms.ToDtype(dtype)
transformed_sample = transform(sample)
for key, value in sample.items():
value_type = type(value)
transformed_value = transformed_sample[key]
# make sure the transformation retains the type
assert isinstance(transformed_value, value_type)
if isinstance(value, torch.Tensor):
assert transformed_value.dtype is expected_dtypes[value_type]
else:
assert transformed_value is value
@pytest.mark.filterwarnings("error")
def test_plain_tensor_call(self):
tensor = torch.empty((), dtype=torch.float32)
transform = transforms.ToDtype({torch.Tensor: torch.float64})
assert transform(tensor).dtype is torch.float64
@pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video])
def test_plain_tensor_warning(self, other_type):
with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")):
transforms.ToDtype(dtype={torch.Tensor: torch.float32, other_type: torch.float64})
class TestUniformTemporalSubsample:
@pytest.mark.parametrize(
"inpt",
[
torch.zeros(10, 3, 8, 8),
torch.zeros(1, 10, 3, 8, 8),
datapoints.Video(torch.zeros(1, 10, 3, 8, 8)),
],
)
def test__transform(self, inpt):
num_samples = 5
transform = transforms.UniformTemporalSubsample(num_samples)
output = transform(inpt)
assert type(output) is type(inpt)
assert output.shape[-4] == num_samples
assert output.dtype == inpt.dtype
# TODO: remove this test in 0.17 when the default of antialias changes to True
def test_antialias_warning():
pil_img = PIL.Image.new("RGB", size=(10, 10), color=127)
tensor_img = torch.randint(0, 256, size=(3, 10, 10), dtype=torch.uint8)
tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8)
match = "The default value of the antialias parameter"
with pytest.warns(UserWarning, match=match):
transforms.Resize((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomResizedCrop((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.ScaleJitter((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomShortestSize((20, 20))(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.RandomResize(10, 20)(tensor_img)
with pytest.warns(UserWarning, match=match):
transforms.functional.resize(tensor_img, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize_image_tensor(tensor_img, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize(tensor_video, (20, 20))
with pytest.warns(UserWarning, match=match):
transforms.functional.resize_video(tensor_video, (20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Image(tensor_img).resize((20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Video(tensor_video).resize((20, 20))
with pytest.warns(UserWarning, match=match):
datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20))
with warnings.catch_warnings():
warnings.simplefilter("error")
transforms.Resize((20, 20))(pil_img)
transforms.RandomResizedCrop((20, 20))(pil_img)
transforms.ScaleJitter((20, 20))(pil_img)
transforms.RandomShortestSize((20, 20))(pil_img)
transforms.RandomResize(10, 20)(pil_img)
transforms.functional.resize(pil_img, (20, 20))
transforms.Resize((20, 20), antialias=True)(tensor_img)
transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img)
transforms.ScaleJitter((20, 20), antialias=True)(tensor_img)
transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img)
transforms.RandomResize(10, 20, antialias=True)(tensor_img)
transforms.functional.resize(tensor_img, (20, 20), antialias=True)
transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True)
transforms.functional.resize(tensor_video, (20, 20), antialias=True)
transforms.functional.resize_video(tensor_video, (20, 20), antialias=True)
datapoints.Image(tensor_img).resize((20, 20), antialias=True)
datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
datapoints.Video(tensor_video).resize((20, 20), antialias=True)
datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image))
@pytest.mark.parametrize("label_type", (torch.Tensor, int))
@pytest.mark.parametrize("dataset_return_type", (dict, tuple))
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor))
def test_classif_preset(image_type, label_type, dataset_return_type, to_tensor):
image = datapoints.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8))
if image_type is PIL.Image:
image = to_pil_image(image[0])
elif image_type is torch.Tensor:
image = image.as_subclass(torch.Tensor)
assert is_simple_tensor(image)
label = 1 if label_type is int else torch.tensor([1])
if dataset_return_type is dict:
sample = {
"image": image,
"label": label,
}
else:
sample = image, label
t = transforms.Compose(
[
transforms.RandomResizedCrop((224, 224)),
transforms.RandomHorizontalFlip(p=1),
transforms.RandAugment(),
transforms.TrivialAugmentWide(),
transforms.AugMix(),
transforms.AutoAugment(),
to_tensor(),
# TODO: ConvertImageDtype is a pass-through on PIL images, is that
# intended? This results in a failure if we convert to tensor after
# it, because the image would still be uint8 which make Normalize
# fail.
transforms.ConvertImageDtype(torch.float),
transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]),
transforms.RandomErasing(p=1),
]
)
out = t(sample)
assert type(out) == type(sample)
if dataset_return_type is tuple:
out_image, out_label = out
else:
assert out.keys() == sample.keys()
out_image, out_label = out.values()
assert out_image.shape[-2:] == (224, 224)
assert out_label == label
@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image))
@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite"))
@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor))
@pytest.mark.parametrize("sanitize", (True, False))
def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize):
torch.manual_seed(0)
if data_augmentation == "hflip":
t = [
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "lsj":
t = [
transforms.ScaleJitter(target_size=(1024, 1024), antialias=True),
# Note: replaced FixedSizeCrop with RandomCrop, becuase we're
# leaving FixedSizeCrop in prototype for now, and it expects Label
# classes which we won't release yet.
# transforms.FixedSizeCrop(
# size=(1024, 1024), fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})
# ),
transforms.RandomCrop((1024, 1024), pad_if_needed=True),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "multiscale":
t = [
transforms.RandomShortestSize(
min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True
),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "ssd":
t = [
transforms.RandomPhotometricDistort(p=1),
transforms.RandomZoomOut(fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})),
transforms.RandomIoUCrop(),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
elif data_augmentation == "ssdlite":
t = [
transforms.RandomIoUCrop(),
transforms.RandomHorizontalFlip(p=1),
to_tensor(),
transforms.ConvertImageDtype(torch.float),
]
if sanitize:
t += [transforms.SanitizeBoundingBoxes()]
t = transforms.Compose(t)
num_boxes = 5
H = W = 250
image = datapoints.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8))
if image_type is PIL.Image:
image = to_pil_image(image[0])
elif image_type is torch.Tensor:
image = image.as_subclass(torch.Tensor)
assert is_simple_tensor(image)
label = torch.randint(0, 10, size=(num_boxes,))
boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4))
boxes[:, 2:] += boxes[:, :2]
boxes = boxes.clamp(min=0, max=min(H, W))
boxes = datapoints.BoundingBox(boxes, format="XYXY", spatial_size=(H, W))
masks = datapoints.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8))
sample = {
"image": image,
"label": label,
"boxes": boxes,
"masks": masks,
}
out = t(sample)
if to_tensor is transforms.ToTensor and image_type is not datapoints.Image:
assert is_simple_tensor(out["image"])
else:
assert isinstance(out["image"], datapoints.Image)
assert isinstance(out["label"], type(sample["label"]))
num_boxes_expected = {
# ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It
# doesn't remove them strictly speaking, it just marks some boxes as
# degenerate and those boxes will be later removed by
# SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize
# param is True.
# Note that the values below are probably specific to the random seed
# set above (which is fine).
(True, "ssd"): 4,
(True, "ssdlite"): 4,
}.get((sanitize, data_augmentation), num_boxes)
assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected
@pytest.mark.parametrize("min_size", (1, 10))
@pytest.mark.parametrize(
"labels_getter", ("default", "labels", lambda inputs: inputs["labels"], None, lambda inputs: None)
)
def test_sanitize_bounding_boxes(min_size, labels_getter):
H, W = 256, 128
boxes_and_validity = [
([0, 1, 10, 1], False), # Y1 == Y2
([0, 1, 0, 20], False), # X1 == X2
([0, 0, min_size - 1, 10], False), # H < min_size
([0, 0, 10, min_size - 1], False), # W < min_size
([0, 0, 10, H + 1], False), # Y2 > H
([0, 0, W + 1, 10], False), # X2 > W
([-1, 1, 10, 20], False), # any < 0
([0, 0, -1, 20], False), # any < 0
([0, 0, -10, -1], False), # any < 0
([0, 0, min_size, 10], True), # H < min_size
([0, 0, 10, min_size], True), # W < min_size
([0, 0, W, H], True), # TODO: Is that actually OK?? Should it be -1?
([1, 1, 30, 20], True),
([0, 0, 10, 10], True),
([1, 1, 30, 20], True),
]
random.shuffle(boxes_and_validity) # For test robustness: mix order of wrong and correct cases
boxes, is_valid_mask = zip(*boxes_and_validity)
valid_indices = [i for (i, is_valid) in enumerate(is_valid_mask) if is_valid]
boxes = torch.tensor(boxes)
labels = torch.arange(boxes.shape[0])
boxes = datapoints.BoundingBox(
boxes,
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(H, W),
)
masks = datapoints.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W)))
sample = {
"image": torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8),
"labels": labels,
"boxes": boxes,
"whatever": torch.rand(10),
"None": None,
"masks": masks,
}
out = transforms.SanitizeBoundingBoxes(min_size=min_size, labels_getter=labels_getter)(sample)
assert out["image"] is sample["image"]
assert out["whatever"] is sample["whatever"]
if labels_getter is None or (callable(labels_getter) and labels_getter({"labels": "blah"}) is None):
assert out["labels"] is sample["labels"]
else:
assert isinstance(out["labels"], torch.Tensor)
assert out["boxes"].shape[0] == out["labels"].shape[0] == out["masks"].shape[0]
# This works because we conveniently set labels to arange(num_boxes)
assert out["labels"].tolist() == valid_indices
@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT"))
def test_sanitize_bounding_boxes_default_heuristic(key):
labels = torch.arange(10)
d = {key: labels}
assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels
if key.lower() != "labels":
# If "labels" is in the dict (case-insensitive),
# it takes precedence over other keys which would otherwise be a match
d = {key: "something_else", "labels": labels}
assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels
def test_sanitize_bounding_boxes_errors():
good_bbox = datapoints.BoundingBox(
[[0, 0, 10, 10]],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(20, 20),
)
with pytest.raises(ValueError, match="min_size must be >= 1"):
transforms.SanitizeBoundingBoxes(min_size=0)
with pytest.raises(ValueError, match="labels_getter should either be a str"):
transforms.SanitizeBoundingBoxes(labels_getter=12)
with pytest.raises(ValueError, match="Could not infer where the labels are"):
bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])}
transforms.SanitizeBoundingBoxes()(bad_labels_key)
with pytest.raises(ValueError, match="If labels_getter is a str or 'default'"):
not_a_dict = (good_bbox, torch.arange(good_bbox.shape[0]))
transforms.SanitizeBoundingBoxes()(not_a_dict)
with pytest.raises(ValueError, match="must be a tensor"):
not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()}
transforms.SanitizeBoundingBoxes()(not_a_tensor)
with pytest.raises(ValueError, match="Number of boxes"):
different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)}
transforms.SanitizeBoundingBoxes()(different_sizes)
with pytest.raises(ValueError, match="boxes must be of shape"):
bad_bbox = datapoints.BoundingBox( # batch with 2 elements
[
[[0, 0, 10, 10]],
[[0, 0, 10, 10]],
],
format=datapoints.BoundingBoxFormat.XYXY,
spatial_size=(20, 20),
)
different_sizes = {"bbox": bad_bbox, "labels": torch.arange(bad_bbox.shape[0])}
transforms.SanitizeBoundingBoxes()(different_sizes)
......@@ -12,9 +12,8 @@ import PIL.Image
import pytest
import torch
import torchvision.prototype.transforms as prototype_transforms
import torchvision.transforms.v2 as v2_transforms
from prototype_common_utils import (
from common_utils import (
ArgsKwargs,
assert_close,
assert_equal,
......@@ -22,7 +21,6 @@ from prototype_common_utils import (
make_detection_mask,
make_image,
make_images,
make_label,
make_segmentation_mask,
)
from torch import nn
......@@ -1056,6 +1054,9 @@ class TestRefDetTransforms:
size = (600, 800)
num_objects = 22
def make_label(extra_dims, categories):
return torch.randint(categories, extra_dims, dtype=torch.int64)
pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
target = {
"boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
......@@ -1102,11 +1103,6 @@ class TestRefDetTransforms:
),
(det_transforms.RandomZoomOut(), v2_transforms.RandomZoomOut(), {"with_mask": False}),
(det_transforms.ScaleJitter((1024, 1024)), v2_transforms.ScaleJitter((1024, 1024)), {}),
(
det_transforms.FixedSizeCrop((1024, 1024), fill=0),
prototype_transforms.FixedSizeCrop((1024, 1024), fill=0),
{},
),
(
det_transforms.RandomShortestSize(
min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333
......
......@@ -11,15 +11,16 @@ import pytest
import torch
from common_utils import cache, cpu_and_gpu, needs_cuda, set_rng_seed
from prototype_common_utils import (
from common_utils import (
assert_close,
cache,
cpu_and_gpu,
DEFAULT_SQUARE_SPATIAL_SIZE,
make_bounding_boxes,
needs_cuda,
parametrized_error_message,
set_rng_seed,
)
from prototype_transforms_dispatcher_infos import DISPATCHER_INFOS
from prototype_transforms_kernel_infos import KERNEL_INFOS
from torch.utils._pytree import tree_map
from torchvision import datapoints
from torchvision.transforms.functional import _get_perspective_coeffs
......@@ -27,6 +28,8 @@ from torchvision.transforms.v2 import functional as F
from torchvision.transforms.v2.functional._geometry import _center_crop_compute_padding
from torchvision.transforms.v2.functional._meta import clamp_bounding_box, convert_format_bounding_box
from torchvision.transforms.v2.utils import is_simple_tensor
from transforms_v2_dispatcher_infos import DISPATCHER_INFOS
from transforms_v2_kernel_infos import KERNEL_INFOS
KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS}
......@@ -635,7 +638,7 @@ class TestConvertFormatBoundingBox:
# TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in
# `prototype_transforms_kernel_infos.py`
# `transforms_v2_kernel_infos.py`
def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_):
......
......@@ -4,7 +4,7 @@ import pytest
import torch
import torchvision.transforms.v2.utils
from prototype_common_utils import make_bounding_box, make_detection_mask, make_image
from common_utils import make_bounding_box, make_detection_mask, make_image
from torchvision import datapoints
from torchvision.transforms.v2.functional import to_image_pil
......
......@@ -2,9 +2,9 @@ import collections.abc
import pytest
import torchvision.transforms.v2.functional as F
from prototype_common_utils import InfoBase, TestMark
from prototype_transforms_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition
from common_utils import InfoBase, TestMark
from torchvision import datapoints
from transforms_v2_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition
__all__ = ["DispatcherInfo", "DISPATCHER_INFOS"]
......@@ -49,7 +49,7 @@ class DispatcherInfo(InfoBase):
if not kernel_info:
raise pytest.UsageError(
f"Can't register {kernel.__name__} for type {datapoint_type} since there is no `KernelInfo` for it. "
f"Please add a `KernelInfo` for it in `prototype_transforms_kernel_infos.py`."
f"Please add a `KernelInfo` for it in `transforms_v2_kernel_infos.py`."
)
kernel_infos[datapoint_type] = kernel_info
self.kernel_infos = kernel_infos
......
......@@ -9,9 +9,9 @@ import pytest
import torch.testing
import torchvision.ops
import torchvision.transforms.v2.functional as F
from datasets_utils import combinations_grid
from prototype_common_utils import (
from common_utils import (
ArgsKwargs,
combinations_grid,
get_num_channels,
ImageLoader,
InfoBase,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment