common_utils.py 31.6 KB
Newer Older
1
import collections.abc
2
import contextlib
3
4
import dataclasses
import enum
5
import functools
6
import itertools
7
import os
8
import pathlib
9
import random
10
import re
11
import shutil
12
import sys
13
import tempfile
14
import warnings
15
from collections import defaultdict
16
from subprocess import CalledProcessError, check_output, STDOUT
17
from typing import Callable, Sequence, Tuple, Union
18
19

import numpy as np
20
21
22

import PIL.Image
import pytest
eellison's avatar
eellison committed
23
import torch
24
import torch.testing
25
from PIL import Image
26

27
28
29
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import datapoints, io
from torchvision.transforms._functional_tensor import _max_value as get_max_value
30
from torchvision.transforms.v2.functional import to_dtype_image_tensor, to_image_pil, to_image_tensor
31

32

33
IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
Philip Meier's avatar
Philip Meier committed
34
35
IN_RE_WORKER = os.environ.get("INSIDE_RE_WORKER") is not None
IN_FBCODE = os.environ.get("IN_FBCODE_TORCHVISION") == "1"
36
CUDA_NOT_AVAILABLE_MSG = "CUDA device not available"
37
OSS_CI_GPU_NO_CUDA_MSG = "We're in an OSS GPU machine, and this test doesn't need cuda."
38

39
40
41
42
43
44
45
46
47
48
49

@contextlib.contextmanager
def get_tmp_dir(src=None, **kwargs):
    tmp_dir = tempfile.mkdtemp(**kwargs)
    if src is not None:
        os.rmdir(tmp_dir)
        shutil.copytree(src, tmp_dir)
    try:
        yield tmp_dir
    finally:
        shutil.rmtree(tmp_dir)
eellison's avatar
eellison committed
50
51


52
53
54
55
56
def set_rng_seed(seed):
    torch.manual_seed(seed)
    random.seed(seed)


57
class MapNestedTensorObjectImpl:
eellison's avatar
eellison committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    def __init__(self, tensor_map_fn):
        self.tensor_map_fn = tensor_map_fn

    def __call__(self, object):
        if isinstance(object, torch.Tensor):
            return self.tensor_map_fn(object)

        elif isinstance(object, dict):
            mapped_dict = {}
            for key, value in object.items():
                mapped_dict[self(key)] = self(value)
            return mapped_dict

        elif isinstance(object, (list, tuple)):
            mapped_iter = []
            for iter in object:
                mapped_iter.append(self(iter))
            return mapped_iter if not isinstance(object, tuple) else tuple(mapped_iter)

        else:
            return object


def map_nested_tensor_object(object, tensor_map_fn):
    impl = MapNestedTensorObjectImpl(tensor_map_fn)
    return impl(object)


86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
def is_iterable(obj):
    try:
        iter(obj)
        return True
    except TypeError:
        return False


@contextlib.contextmanager
def freeze_rng_state():
    rng_state = torch.get_rng_state()
    if torch.cuda.is_available():
        cuda_rng_state = torch.cuda.get_rng_state()
    yield
    if torch.cuda.is_available():
        torch.cuda.set_rng_state(cuda_rng_state)
    torch.set_rng_state(rng_state)
103
104


105
def cycle_over(objs):
106
    for idx, obj1 in enumerate(objs):
107
        for obj2 in objs[:idx] + objs[idx + 1 :]:
108
            yield obj1, obj2
109
110
111


def int_dtypes():
112
    return (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64)
113
114
115


def float_dtypes():
116
    return (torch.float32, torch.float64)
117
118
119
120
121
122
123
124


@contextlib.contextmanager
def disable_console_output():
    with contextlib.ExitStack() as stack, open(os.devnull, "w") as devnull:
        stack.enter_context(contextlib.redirect_stdout(devnull))
        stack.enter_context(contextlib.redirect_stderr(devnull))
        yield
125
126


127
def cpu_and_cuda():
128
    import pytest  # noqa
129
130

    return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
131
132
133
134


def needs_cuda(test_func):
    import pytest  # noqa
135

136
    return pytest.mark.needs_cuda(test_func)
Nicolas Hug's avatar
Nicolas Hug committed
137
138
139
140
141


def _create_data(height=3, width=3, channels=3, device="cpu"):
    # TODO: When all relevant tests are ported to pytest, turn this into a module-level fixture
    tensor = torch.randint(0, 256, (channels, height, width), dtype=torch.uint8, device=device)
142
143
144
145
146
147
    data = tensor.permute(1, 2, 0).contiguous().cpu().numpy()
    mode = "RGB"
    if channels == 1:
        mode = "L"
        data = data[..., 0]
    pil_img = Image.fromarray(data, mode=mode)
Nicolas Hug's avatar
Nicolas Hug committed
148
149
150
151
152
    return tensor, pil_img


def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu"):
    # TODO: When all relevant tests are ported to pytest, turn this into a module-level fixture
153
    batch_tensor = torch.randint(0, 256, (num_samples, channels, height, width), dtype=torch.uint8, device=device)
Nicolas Hug's avatar
Nicolas Hug committed
154
155
156
    return batch_tensor


157
158
159
160
161
162
163
164
165
166
167
168
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
    names = []
    for i in range(num_videos):
        if sizes is None:
            size = 5 * (i + 1)
        else:
            size = sizes[i]
        if fps is None:
            f = 5
        else:
            f = fps[i]
        data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8)
169
        name = os.path.join(tmpdir, f"{i}.mp4")
170
171
172
173
174
175
        names.append(name)
        io.write_video(name, data, fps=f)

    return names


Nicolas Hug's avatar
Nicolas Hug committed
176
def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None):
177
    # FIXME: this is handled automatically by `assert_equal` below. Let's remove this in favor of it
Nicolas Hug's avatar
Nicolas Hug committed
178
179
180
181
182
    np_pil_image = np.array(pil_image)
    if np_pil_image.ndim == 2:
        np_pil_image = np_pil_image[:, :, None]
    pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1)))
    if msg is None:
183
        msg = f"tensor:\n{tensor} \ndid not equal PIL tensor:\n{pil_tensor}"
184
    assert_equal(tensor.cpu(), pil_tensor, msg=msg)
Nicolas Hug's avatar
Nicolas Hug committed
185
186


187
188
189
def _assert_approx_equal_tensor_to_pil(
    tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None
):
190
    # FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it
Nicolas Hug's avatar
Nicolas Hug committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
    # TODO: we could just merge this into _assert_equal_tensor_to_pil
    np_pil_image = np.array(pil_image)
    if np_pil_image.ndim == 2:
        np_pil_image = np_pil_image[:, :, None]
    pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))).to(tensor)

    if allowed_percentage_diff is not None:
        # Assert that less than a given %age of pixels are different
        assert (tensor != pil_tensor).to(torch.float).mean() <= allowed_percentage_diff

    # error value can be mean absolute error, max abs error
    # Convert to float to avoid underflow when computing absolute difference
    tensor = tensor.to(torch.float)
    pil_tensor = pil_tensor.to(torch.float)
    err = getattr(torch, agg_method)(torch.abs(tensor - pil_tensor)).item()
206
    assert err < tol, f"{err} vs {tol}"
Nicolas Hug's avatar
Nicolas Hug committed
207
208
209
210
211
212
213


def _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs):
    transformed_batch = fn(batch_tensors, **fn_kwargs)
    for i in range(len(batch_tensors)):
        img_tensor = batch_tensors[i, ...]
        transformed_img = fn(img_tensor, **fn_kwargs)
214
        torch.testing.assert_close(transformed_img, transformed_batch[i, ...], rtol=0, atol=1e-6)
Nicolas Hug's avatar
Nicolas Hug committed
215
216
217
218
219
220

    if scripted_fn_atol >= 0:
        scripted_fn = torch.jit.script(fn)
        # scriptable function test
        s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs)
        torch.testing.assert_close(transformed_batch, s_transformed_batch, rtol=1e-5, atol=scripted_fn_atol)
221
222
223


def cache(fn):
224
225
    """Similar to :func:`functools.cache` (Python >= 3.8) or :func:`functools.lru_cache` with infinite cache size,
    but this also caches exceptions.
226
227
228
    """
    sentinel = object()
    out_cache = {}
229
    exc_tb_cache = {}
230
231
232
233
234
235
236
237
238

    @functools.wraps(fn)
    def wrapper(*args, **kwargs):
        key = args + tuple(kwargs.values())

        out = out_cache.get(key, sentinel)
        if out is not sentinel:
            return out

239
240
241
        exc_tb = exc_tb_cache.get(key, sentinel)
        if exc_tb is not sentinel:
            raise exc_tb[0].with_traceback(exc_tb[1])
242
243
244
245

        try:
            out = fn(*args, **kwargs)
        except Exception as exc:
246
247
248
249
            # We need to cache the traceback here as well. Otherwise, each re-raise will add the internal pytest
            # traceback frames anew, but they will only be removed once. Thus, the traceback will be ginormous hiding
            # the actual information in the noise. See https://github.com/pytest-dev/pytest/issues/10363 for details.
            exc_tb_cache[key] = exc, exc.__traceback__
250
251
252
253
254
255
            raise exc

        out_cache[key] = out
        return out

    return wrapper
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296


def combinations_grid(**kwargs):
    """Creates a grid of input combinations.

    Each element in the returned sequence is a dictionary containing one possible combination as values.

    Example:
        >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham"))
        [
            {'foo': 'bar', 'spam': 'eggs'},
            {'foo': 'bar', 'spam': 'ham'},
            {'foo': 'baz', 'spam': 'eggs'},
            {'foo': 'baz', 'spam': 'ham'}
        ]
    """
    return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())]


class ImagePair(TensorLikePair):
    def __init__(
        self,
        actual,
        expected,
        *,
        mae=False,
        **other_parameters,
    ):
        if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]):
            actual, expected = [to_image_tensor(input) for input in [actual, expected]]

        super().__init__(actual, expected, **other_parameters)
        self.mae = mae

    def compare(self) -> None:
        actual, expected = self.actual, self.expected

        self._compare_attributes(actual, expected)
        actual, expected = self._equalize_attributes(actual, expected)

        if self.mae:
297
298
            if actual.dtype is torch.uint8:
                actual, expected = actual.to(torch.int), expected.to(torch.int)
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
            mae = float(torch.abs(actual - expected).float().mean())
            if mae > self.atol:
                self._fail(
                    AssertionError,
                    f"The MAE of the images is {mae}, but only {self.atol} is allowed.",
                )
        else:
            super()._compare_values(actual, expected)


def assert_close(
    actual,
    expected,
    *,
    allow_subclasses=True,
    rtol=None,
    atol=None,
    equal_nan=False,
    check_device=True,
    check_dtype=True,
    check_layout=True,
    check_stride=False,
    msg=None,
    **kwargs,
):
    """Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison"""
    __tracebackhide__ = True

    error_metas = not_close_error_metas(
        actual,
        expected,
        pair_types=(
            NonePair,
            BooleanPair,
            NumberPair,
            ImagePair,
            TensorLikePair,
        ),
        allow_subclasses=allow_subclasses,
        rtol=rtol,
        atol=atol,
        equal_nan=equal_nan,
        check_device=check_device,
        check_dtype=check_dtype,
        check_layout=check_layout,
        check_stride=check_stride,
        **kwargs,
    )

    if error_metas:
        raise error_metas[0].to_error(msg)


assert_equal = functools.partial(assert_close, rtol=0, atol=0)


def parametrized_error_message(*args, **kwargs):
    def to_str(obj):
357
        if isinstance(obj, torch.Tensor) and obj.numel() > 30:
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
            return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})"
        elif isinstance(obj, enum.Enum):
            return f"{type(obj).__name__}.{obj.name}"
        else:
            return repr(obj)

    if args or kwargs:
        postfix = "\n".join(
            [
                "",
                "Failure happened for the following parameters:",
                "",
                *[to_str(arg) for arg in args],
                *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()],
            ]
        )
    else:
        postfix = ""

    def wrapper(msg):
        return msg + postfix

    return wrapper


class ArgsKwargs:
    def __init__(self, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs

    def __iter__(self):
        yield self.args
        yield self.kwargs

    def load(self, device="cpu"):
        return ArgsKwargs(
            *(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args),
            **{
                keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg
                for keyword, arg in self.kwargs.items()
            },
        )


402
403
404
# new v2 default
DEFAULT_SIZE = (17, 11)
# old v2 defaults
405
406
407
408
409
410
411
412
413
414
415
416
DEFAULT_SQUARE_SPATIAL_SIZE = 15
DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33)
DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9)
DEFAULT_SPATIAL_SIZES = (
    DEFAULT_LANDSCAPE_SPATIAL_SIZE,
    DEFAULT_PORTRAIT_SPATIAL_SIZE,
    DEFAULT_SQUARE_SPATIAL_SIZE,
)


def _parse_spatial_size(size, *, name="size"):
    if size == "random":
417
        raise ValueError("This should never happen")
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
    elif isinstance(size, int) and size > 0:
        return (size, size)
    elif (
        isinstance(size, collections.abc.Sequence)
        and len(size) == 2
        and all(isinstance(length, int) and length > 0 for length in size)
    ):
        return tuple(size)
    else:
        raise pytest.UsageError(
            f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers,"
            f"but got {size} instead."
        )


VALID_EXTRA_DIMS = ((), (4,), (2, 3))
DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5))

DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS)


def from_loader(loader_fn):
    def wrapper(*args, **kwargs):
        device = kwargs.pop("device", "cpu")
        loader = loader_fn(*args, **kwargs)
        return loader.load(device)

    return wrapper


def from_loaders(loaders_fn):
    def wrapper(*args, **kwargs):
        device = kwargs.pop("device", "cpu")
        loaders = loaders_fn(*args, **kwargs)
        for loader in loaders:
            yield loader.load(device)

    return wrapper


@dataclasses.dataclass
class TensorLoader:
    fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor]
    shape: Sequence[int]
    dtype: torch.dtype

    def load(self, device):
        return self.fn(self.shape, self.dtype, device)


@dataclasses.dataclass
class ImageLoader(TensorLoader):
    spatial_size: Tuple[int, int] = dataclasses.field(init=False)
    num_channels: int = dataclasses.field(init=False)
472
    memory_format: torch.memory_format = torch.contiguous_format
473
474
475
476
477

    def __post_init__(self):
        self.spatial_size = self.shape[-2:]
        self.num_channels = self.shape[-3]

478
479
480
    def load(self, device):
        return self.fn(self.shape, self.dtype, device, memory_format=self.memory_format)

481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496

NUM_CHANNELS_MAP = {
    "GRAY": 1,
    "GRAY_ALPHA": 2,
    "RGB": 3,
    "RGBA": 4,
}


def get_num_channels(color_space):
    num_channels = NUM_CHANNELS_MAP.get(color_space)
    if not num_channels:
        raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}")
    return num_channels


497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
def make_image(
    size=DEFAULT_SIZE,
    *,
    color_space="RGB",
    batch_dims=(),
    dtype=None,
    device="cpu",
    memory_format=torch.contiguous_format,
):
    max_value = get_max_value(dtype)
    data = torch.testing.make_tensor(
        (*batch_dims, get_num_channels(color_space), *size),
        low=0,
        high=max_value,
        dtype=dtype or torch.uint8,
        device=device,
        memory_format=memory_format,
    )
    if color_space in {"GRAY_ALPHA", "RGBA"}:
        data[..., -1, :, :] = max_value

    return datapoints.Image(data)


def make_image_tensor(*args, **kwargs):
    return make_image(*args, **kwargs).as_subclass(torch.Tensor)


def make_image_pil(*args, **kwargs):
    return to_image_pil(make_image(*args, **kwargs))


529
def make_image_loader(
530
    size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
531
532
533
534
535
    *,
    color_space="RGB",
    extra_dims=(),
    dtype=torch.float32,
    constant_alpha=True,
536
    memory_format=torch.contiguous_format,
537
):
538
539
    if not constant_alpha:
        raise ValueError("This should never happen")
540
541
542
    size = _parse_spatial_size(size)
    num_channels = get_num_channels(color_space)

543
    def fn(shape, dtype, device, memory_format):
544
545
546
547
548
549
550
551
        *batch_dims, _, height, width = shape
        return make_image(
            (height, width),
            color_space=color_space,
            batch_dims=batch_dims,
            dtype=dtype,
            device=device,
            memory_format=memory_format,
552
        )
553

554
    return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format)
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576


def make_image_loaders(
    *,
    sizes=DEFAULT_SPATIAL_SIZES,
    color_spaces=(
        "GRAY",
        "GRAY_ALPHA",
        "RGB",
        "RGBA",
    ),
    extra_dims=DEFAULT_EXTRA_DIMS,
    dtypes=(torch.float32, torch.float64, torch.uint8),
    constant_alpha=True,
):
    for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes):
        yield make_image_loader(**params, constant_alpha=constant_alpha)


make_images = from_loaders(make_image_loaders)


577
def make_image_loader_for_interpolation(
578
    size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format
579
):
580
581
582
    size = _parse_spatial_size(size)
    num_channels = get_num_channels(color_space)

583
    def fn(shape, dtype, device, memory_format):
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
        height, width = shape[-2:]

        image_pil = (
            PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg")
            .resize((width, height))
            .convert(
                {
                    "GRAY": "L",
                    "GRAY_ALPHA": "LA",
                    "RGB": "RGB",
                    "RGBA": "RGBA",
                }[color_space]
            )
        )

599
600
601
602
603
        image_tensor = to_image_tensor(image_pil)
        if memory_format == torch.contiguous_format:
            image_tensor = image_tensor.to(device=device, memory_format=memory_format, copy=True)
        else:
            image_tensor = image_tensor.to(device=device)
604
        image_tensor = to_dtype_image_tensor(image_tensor, dtype=dtype, scale=True)
605
606
607

        return datapoints.Image(image_tensor)

608
    return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, memory_format=memory_format)
609
610
611
612
613
614


def make_image_loaders_for_interpolation(
    sizes=((233, 147),),
    color_spaces=("RGB",),
    dtypes=(torch.uint8,),
615
    memory_formats=(torch.contiguous_format, torch.channels_last),
616
):
617
    for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes, memory_format=memory_formats):
618
619
620
621
622
623
624
625
626
        yield make_image_loader_for_interpolation(**params)


@dataclasses.dataclass
class BoundingBoxLoader(TensorLoader):
    format: datapoints.BoundingBoxFormat
    spatial_size: Tuple[int, int]


627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
def make_bounding_box(
    size=None,
    *,
    format=datapoints.BoundingBoxFormat.XYXY,
    spatial_size=None,
    batch_dims=(),
    dtype=None,
    device="cpu",
):
    """
    size: Size of the actual bounding box, i.e.
        - (box[3] - box[1], box[2] - box[0]) for XYXY
        - (H, W) for XYWH and CXCYWH
    spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
        returned datapoints.BoundingBox

    To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
    functions, e.g.

    .. code::

        image = make_image=(size=size)
        bounding_box = make_bounding_box(spatial_size=size)
        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)

    For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
    other maker functions, e.g.

    .. code::

        image = make_image=()
        bounding_box = make_bounding_box()
        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)
    """

    def sample_position(values, max_value):
        # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high.
        # However, if we have batch_dims, we need tensors as limits.
        return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape)

    if isinstance(format, str):
        format = datapoints.BoundingBoxFormat[format]

    if spatial_size is None:
        if size is None:
            spatial_size = DEFAULT_SIZE
        else:
            height, width = size
            height_margin, width_margin = torch.randint(10, (2,)).tolist()
            spatial_size = (height + height_margin, width + width_margin)

    dtype = dtype or torch.float32

    if any(dim == 0 for dim in batch_dims):
        return datapoints.BoundingBox(
            torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
        )

    if size is None:
        h, w = [torch.randint(1, s, batch_dims) for s in spatial_size]
    else:
        h, w = [torch.full(batch_dims, s, dtype=torch.int) for s in size]

    y = sample_position(h, spatial_size[0])
    x = sample_position(w, spatial_size[1])

    if format is datapoints.BoundingBoxFormat.XYWH:
        parts = (x, y, w, h)
    elif format is datapoints.BoundingBoxFormat.XYXY:
        x1, y1 = x, y
        x2 = x1 + w
        y2 = y1 + h
        parts = (x1, y1, x2, y2)
    elif format is datapoints.BoundingBoxFormat.CXCYWH:
        cx = x + w / 2
        cy = y + h / 2
        parts = (cx, cy, w, h)
    else:
        raise ValueError(f"Format {format} is not supported")

    return datapoints.BoundingBox(
        torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
709
710
711
    )


712
def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
713
714
715
716
717
718
    if isinstance(format, str):
        format = datapoints.BoundingBoxFormat[format]

    spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")

    def fn(shape, dtype, device):
719
        *batch_dims, num_coordinates = shape
720
721
722
        if num_coordinates != 4:
            raise pytest.UsageError()

723
724
        return make_bounding_box(
            format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
725
726
727
728
729
730
731
732
733
        )

    return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)


def make_bounding_box_loaders(
    *,
    extra_dims=DEFAULT_EXTRA_DIMS,
    formats=tuple(datapoints.BoundingBoxFormat),
734
    spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
735
736
737
738
739
740
741
742
743
744
745
746
747
    dtypes=(torch.float32, torch.float64, torch.int64),
):
    for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
        yield make_bounding_box_loader(**params, spatial_size=spatial_size)


make_bounding_boxes = from_loaders(make_bounding_box_loaders)


class MaskLoader(TensorLoader):
    pass


748
749
750
751
752
753
754
755
756
757
758
759
760
761
def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"):
    """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks"""
    return datapoints.Mask(
        torch.testing.make_tensor(
            (*batch_dims, num_objects, *size),
            low=0,
            high=2,
            dtype=dtype or torch.bool,
            device=device,
        )
    )


def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8):
762
763
764
765
    # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
    size = _parse_spatial_size(size)

    def fn(shape, dtype, device):
766
767
768
769
        *batch_dims, num_objects, height, width = shape
        return make_detection_mask(
            (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device
        )
770
771
772
773
774
775

    return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype)


def make_detection_mask_loaders(
    sizes=DEFAULT_SPATIAL_SIZES,
776
    num_objects=(1, 0, 5),
777
778
779
780
781
782
783
784
785
786
    extra_dims=DEFAULT_EXTRA_DIMS,
    dtypes=(torch.uint8,),
):
    for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes):
        yield make_detection_mask_loader(**params)


make_detection_masks = from_loaders(make_detection_mask_loaders)


787
788
789
790
791
792
793
794
795
796
797
def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"):
    """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value"""
    return datapoints.Mask(
        torch.testing.make_tensor(
            (*batch_dims, *size),
            low=0,
            high=num_categories,
            dtype=dtype or torch.uint8,
            device=device,
        )
    )
798
799


800
801
802
803
804
def make_segmentation_mask_loader(
    size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8
):
    # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
    spatial_size = _parse_spatial_size(size)
805

806
807
808
809
810
    def fn(shape, dtype, device):
        *batch_dims, height, width = shape
        return make_segmentation_mask(
            (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device
        )
811

812
    return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype)
813
814
815
816
817


def make_segmentation_mask_loaders(
    *,
    sizes=DEFAULT_SPATIAL_SIZES,
818
    num_categories=(1, 2, 10),
819
820
821
822
823
824
825
826
827
828
829
830
831
    extra_dims=DEFAULT_EXTRA_DIMS,
    dtypes=(torch.uint8,),
):
    for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes):
        yield make_segmentation_mask_loader(**params)


make_segmentation_masks = from_loaders(make_segmentation_mask_loaders)


def make_mask_loaders(
    *,
    sizes=DEFAULT_SPATIAL_SIZES,
832
833
    num_objects=(1, 0, 5),
    num_categories=(1, 2, 10),
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
    extra_dims=DEFAULT_EXTRA_DIMS,
    dtypes=(torch.uint8,),
):
    yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes)
    yield from make_segmentation_mask_loaders(
        sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes
    )


make_masks = from_loaders(make_mask_loaders)


class VideoLoader(ImageLoader):
    pass


850
851
852
853
def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs):
    return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs))


854
def make_video_loader(
855
    size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
856
857
    *,
    color_space="RGB",
858
    num_frames=3,
859
860
861
862
863
    extra_dims=(),
    dtype=torch.uint8,
):
    size = _parse_spatial_size(size)

864
    def fn(shape, dtype, device, memory_format):
865
866
867
868
869
870
871
872
873
        *batch_dims, num_frames, _, height, width = shape
        return make_video(
            (height, width),
            num_frames=num_frames,
            batch_dims=batch_dims,
            color_space=color_space,
            dtype=dtype,
            device=device,
            memory_format=memory_format,
874
        )
875
876
877
878
879
880
881
882
883
884
885

    return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)


def make_video_loaders(
    *,
    sizes=DEFAULT_SPATIAL_SIZES,
    color_spaces=(
        "GRAY",
        "RGB",
    ),
886
    num_frames=(1, 0, 3),
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
    extra_dims=DEFAULT_EXTRA_DIMS,
    dtypes=(torch.uint8, torch.float32, torch.float64),
):
    for params in combinations_grid(
        size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes
    ):
        yield make_video_loader(**params)


make_videos = from_loaders(make_video_loaders)


class TestMark:
    def __init__(
        self,
        # Tuple of test class name and test function name that identifies the test the mark is applied to. If there is
        # no test class, i.e. a standalone test function, use `None`.
        test_id,
        # `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail`
        mark,
        *,
        # Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be
        # applied. If omitted, defaults to always apply.
        condition=None,
    ):
        self.test_id = test_id
        self.mark = mark
        self.condition = condition or (lambda args_kwargs: True)


def mark_framework_limitation(test_id, reason, condition=None):
    # The purpose of this function is to have a single entry point for skip marks that are only there, because the test
    # framework cannot handle the kernel in general or a specific parameter combination.
    # As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is
    # still justified.
    # We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus,
    # we are wasting CI resources for no reason for most of the time
    return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition)


class InfoBase:
    def __init__(
        self,
        *,
        # Identifier if the info that shows up the parametrization.
        id,
        # Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization.
        # See the `TestMark` class for details
        test_marks=None,
        # Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see
        # `TestMark`), the dtype, and the device.
        closeness_kwargs=None,
    ):
        self.id = id

        self.test_marks = test_marks or []
        test_marks_map = defaultdict(list)
        for test_mark in self.test_marks:
            test_marks_map[test_mark.test_id].append(test_mark)
        self._test_marks_map = dict(test_marks_map)

        self.closeness_kwargs = closeness_kwargs or dict()

    def get_marks(self, test_id, args_kwargs):
        return [
            test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs)
        ]

    def get_closeness_kwargs(self, test_id, *, dtype, device):
        if not (isinstance(test_id, tuple) and len(test_id) == 2):
            msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name"
            if callable(test_id):
                msg += ". Did you forget to add the `test_id` fixture to parameters of the test?"
            else:
                msg += f", but got {test_id} instead."
            raise pytest.UsageError(msg)
        if isinstance(device, torch.device):
            device = device.type
        return self.closeness_kwargs.get((test_id, dtype, device), dict())
966
967
968
969


def assert_run_python_script(source_code):
    """Utility to check assertions in an independent Python subprocess.
970

971
    The script provided in the source code should return 0 and not print
972
973
974
975
    anything on stderr or stdout. Modified from scikit-learn test utils.

    Args:
        source_code (str): The Python source code to execute.
976
    """
977
978
979
980
    with get_tmp_dir() as root:
        path = pathlib.Path(root) / "main.py"
        with open(path, "w") as file:
            file.write(source_code)
981
982

        try:
983
            out = check_output([sys.executable, str(path)], stderr=STDOUT)
984
985
986
987
        except CalledProcessError as e:
            raise RuntimeError(f"script errored with output:\n{e.output.decode()}")
        if out != b"":
            raise AssertionError(out.decode())
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007


@contextlib.contextmanager
def assert_no_warnings():
    # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes
    # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit.
    with warnings.catch_warnings():
        warnings.simplefilter("error")
        yield


@contextlib.contextmanager
def ignore_jit_no_profile_information_warning():
    # Calling a scripted object often triggers a warning like
    # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information`
    # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore
    # them.
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning)
        yield